O3 CPU now handles being used with the sampler.
cpu/o3/2bit_local_pred.cc: cpu/o3/2bit_local_pred.hh: cpu/o3/bpred_unit.hh: cpu/o3/bpred_unit_impl.hh: cpu/o3/btb.cc: cpu/o3/btb.hh: cpu/o3/commit.hh: cpu/o3/commit_impl.hh: cpu/o3/cpu.cc: cpu/o3/cpu.hh: cpu/o3/decode.hh: cpu/o3/decode_impl.hh: cpu/o3/fetch.hh: cpu/o3/fetch_impl.hh: cpu/o3/fu_pool.cc: cpu/o3/fu_pool.hh: cpu/o3/iew.hh: cpu/o3/iew_impl.hh: cpu/o3/inst_queue.hh: cpu/o3/inst_queue_impl.hh: cpu/o3/lsq.hh: cpu/o3/lsq_impl.hh: cpu/o3/lsq_unit.hh: cpu/o3/lsq_unit_impl.hh: cpu/o3/mem_dep_unit.hh: cpu/o3/mem_dep_unit_impl.hh: cpu/o3/ras.cc: cpu/o3/ras.hh: cpu/o3/rename.hh: cpu/o3/rename_impl.hh: cpu/o3/rob.hh: cpu/o3/rob_impl.hh: cpu/o3/sat_counter.cc: cpu/o3/sat_counter.hh: cpu/o3/thread_state.hh: Handle switching out and taking over. Needs to be able to reset all state. cpu/o3/alpha_cpu_impl.hh: Handle taking over from another XC. --HG-- extra : convert_revision : b936e826f0f8a18319bfa940ff35097b4192b449
This commit is contained in:
parent
4601230d35
commit
f3358e5f7b
|
@ -67,6 +67,14 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
|
||||||
instShiftAmt);
|
instShiftAmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
DefaultBP::reset()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < localPredictorSets; ++i) {
|
||||||
|
localCtrs[i].reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
DefaultBP::lookup(Addr &branch_addr)
|
DefaultBP::lookup(Addr &branch_addr)
|
||||||
{
|
{
|
||||||
|
|
|
@ -62,6 +62,8 @@ class DefaultBP
|
||||||
*/
|
*/
|
||||||
void update(Addr &branch_addr, bool taken);
|
void update(Addr &branch_addr, bool taken);
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -151,6 +151,26 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
|
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
|
||||||
{
|
{
|
||||||
|
// some things should already be set up
|
||||||
|
assert(getMemPtr() == old_context->getMemPtr());
|
||||||
|
#if FULL_SYSTEM
|
||||||
|
assert(getSystemPtr() == old_context->getSystemPtr());
|
||||||
|
#else
|
||||||
|
assert(getProcessPtr() == old_context->getProcessPtr());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// copy over functional state
|
||||||
|
setStatus(old_context->status());
|
||||||
|
copyArchRegs(old_context);
|
||||||
|
setCpuId(old_context->readCpuId());
|
||||||
|
#if !FULL_SYSTEM
|
||||||
|
thread->funcExeInst = old_context->readFuncExeInst();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
old_context->setStatus(ExecContext::Unallocated);
|
||||||
|
|
||||||
|
thread->inSyscall = false;
|
||||||
|
thread->trapPending = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
|
@ -67,6 +67,10 @@ class TwobitBPredUnit
|
||||||
*/
|
*/
|
||||||
void regStats();
|
void regStats();
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Predicts whether or not the instruction is a taken branch, and the
|
* Predicts whether or not the instruction is a taken branch, and the
|
||||||
* target of the branch if it is taken.
|
* target of the branch if it is taken.
|
||||||
|
|
|
@ -94,6 +94,26 @@ TwobitBPredUnit<Impl>::regStats()
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
TwobitBPredUnit<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
|
predHist[i].clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
TwobitBPredUnit<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < Impl::MaxThreads; ++i)
|
||||||
|
RAS[i].reset();
|
||||||
|
|
||||||
|
BP.reset();
|
||||||
|
BTB.reset();
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
bool
|
bool
|
||||||
TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
|
@ -297,5 +317,6 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
||||||
BP.update(pred_hist.front().PC, actually_taken);
|
BP.update(pred_hist.front().PC, actually_taken);
|
||||||
|
|
||||||
BTB.update(pred_hist.front().PC, corr_target, tid);
|
BTB.update(pred_hist.front().PC, corr_target, tid);
|
||||||
|
pred_hist.pop_front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,14 @@ DefaultBTB::DefaultBTB(unsigned _numEntries,
|
||||||
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
|
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
DefaultBTB::reset()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < numEntries; ++i) {
|
||||||
|
btb[i].valid = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
unsigned
|
unsigned
|
||||||
DefaultBTB::getIndex(const Addr &inst_PC)
|
DefaultBTB::getIndex(const Addr &inst_PC)
|
||||||
|
|
|
@ -65,6 +65,8 @@ class DefaultBTB
|
||||||
DefaultBTB(unsigned numEntries, unsigned tagBits,
|
DefaultBTB(unsigned numEntries, unsigned tagBits,
|
||||||
unsigned instShiftAmt);
|
unsigned instShiftAmt);
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
|
||||||
/** Looks up an address in the BTB. Must call valid() first on the address.
|
/** Looks up an address in the BTB. Must call valid() first on the address.
|
||||||
* @param inst_PC The address of the branch to look up.
|
* @param inst_PC The address of the branch to look up.
|
||||||
* @param tid The thread id.
|
* @param tid The thread id.
|
||||||
|
|
|
@ -175,6 +175,10 @@ class DefaultCommit
|
||||||
/** Initializes stage by sending back the number of free entries. */
|
/** Initializes stage by sending back the number of free entries. */
|
||||||
void initStage();
|
void initStage();
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Ticks the commit stage, which tries to commit instructions. */
|
/** Ticks the commit stage, which tries to commit instructions. */
|
||||||
void tick();
|
void tick();
|
||||||
|
|
||||||
|
@ -351,6 +355,8 @@ class DefaultCommit
|
||||||
/** Number of Active Threads */
|
/** Number of Active Threads */
|
||||||
unsigned numThreads;
|
unsigned numThreads;
|
||||||
|
|
||||||
|
bool switchedOut;
|
||||||
|
|
||||||
Tick trapLatency;
|
Tick trapLatency;
|
||||||
|
|
||||||
Tick fetchTrapLatency;
|
Tick fetchTrapLatency;
|
||||||
|
|
|
@ -54,6 +54,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultCommit<Impl>::TrapEvent::process()
|
DefaultCommit<Impl>::TrapEvent::process()
|
||||||
{
|
{
|
||||||
|
// This will get reset if it was switched out.
|
||||||
commit->trapSquash[tid] = true;
|
commit->trapSquash[tid] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,7 +76,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
|
||||||
renameWidth(params->renameWidth),
|
renameWidth(params->renameWidth),
|
||||||
iewWidth(params->executeWidth),
|
iewWidth(params->executeWidth),
|
||||||
commitWidth(params->commitWidth),
|
commitWidth(params->commitWidth),
|
||||||
numThreads(params->numberOfThreads)
|
numThreads(params->numberOfThreads),
|
||||||
|
switchedOut(false)
|
||||||
{
|
{
|
||||||
_status = Active;
|
_status = Active;
|
||||||
_nextStatus = Inactive;
|
_nextStatus = Inactive;
|
||||||
|
@ -254,6 +256,9 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
|
||||||
// Commit must broadcast the number of free entries it has at the start of
|
// Commit must broadcast the number of free entries it has at the start of
|
||||||
// the simulation, so it starts as active.
|
// the simulation, so it starts as active.
|
||||||
cpu->activateStage(FullCPU::CommitIdx);
|
cpu->activateStage(FullCPU::CommitIdx);
|
||||||
|
|
||||||
|
trapLatency = cpu->cycles(6);
|
||||||
|
fetchTrapLatency = cpu->cycles(12);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -360,6 +365,29 @@ DefaultCommit<Impl>::initStage()
|
||||||
cpu->activityThisCycle();
|
cpu->activityThisCycle();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultCommit<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
rob->switchOut();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultCommit<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
_status = Active;
|
||||||
|
_nextStatus = Inactive;
|
||||||
|
for (int i=0; i < numThreads; i++) {
|
||||||
|
commitStatus[i] = Idle;
|
||||||
|
changedROBNumEntries[i] = false;
|
||||||
|
trapSquash[i] = false;
|
||||||
|
xcSquash[i] = false;
|
||||||
|
}
|
||||||
|
squashCounter = 0;
|
||||||
|
rob->takeOverFrom();
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultCommit<Impl>::updateStatus()
|
DefaultCommit<Impl>::updateStatus()
|
||||||
|
@ -719,8 +747,9 @@ DefaultCommit<Impl>::commit()
|
||||||
while (threads != (*activeThreads).end()) {
|
while (threads != (*activeThreads).end()) {
|
||||||
unsigned tid = *threads++;
|
unsigned tid = *threads++;
|
||||||
|
|
||||||
if (fromFetch->fetchFault) {
|
if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
|
||||||
// Record the fault. Wait until it's empty in the ROB. Then handle the trap.
|
// Record the fault. Wait until it's empty in the ROB. Then handle the trap.
|
||||||
|
// Ignore it if there's already a trap pending as fetch will be redirected.
|
||||||
fetchFault = fromFetch->fetchFault;
|
fetchFault = fromFetch->fetchFault;
|
||||||
fetchFaultSN = fromFetch->fetchFaultSN;
|
fetchFaultSN = fromFetch->fetchFaultSN;
|
||||||
fetchFaultTick = curTick + fetchTrapLatency;
|
fetchFaultTick = curTick + fetchTrapLatency;
|
||||||
|
@ -975,6 +1004,7 @@ DefaultCommit<Impl>::commitInsts()
|
||||||
}
|
}
|
||||||
|
|
||||||
PC[tid] = nextPC[tid];
|
PC[tid] = nextPC[tid];
|
||||||
|
nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
int count = 0;
|
int count = 0;
|
||||||
Addr oldpc;
|
Addr oldpc;
|
||||||
|
@ -1002,6 +1032,10 @@ DefaultCommit<Impl>::commitInsts()
|
||||||
|
|
||||||
DPRINTF(CommitRate, "%i\n", num_committed);
|
DPRINTF(CommitRate, "%i\n", num_committed);
|
||||||
numCommittedDist.sample(num_committed);
|
numCommittedDist.sample(num_committed);
|
||||||
|
|
||||||
|
if (num_committed == commitWidth) {
|
||||||
|
commit_eligible[0]++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
|
@ -124,6 +124,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
|
||||||
mem(params->mem),
|
mem(params->mem),
|
||||||
#else
|
#else
|
||||||
// pTable(params->pTable),
|
// pTable(params->pTable),
|
||||||
|
mem(params->workload[0]->getMemory()),
|
||||||
#endif // FULL_SYSTEM
|
#endif // FULL_SYSTEM
|
||||||
|
|
||||||
icacheInterface(params->icacheInterface),
|
icacheInterface(params->icacheInterface),
|
||||||
|
@ -176,9 +177,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
|
||||||
numThreads = number_of_threads;
|
numThreads = number_of_threads;
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
int activeThreads = params->workload.size();
|
int active_threads = params->workload.size();
|
||||||
#else
|
#else
|
||||||
int activeThreads = 1;
|
int active_threads = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
|
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
|
||||||
|
@ -192,7 +193,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
|
||||||
PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
|
PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
|
||||||
|
|
||||||
for (int tid=0; tid < numThreads; tid++) {
|
for (int tid=0; tid < numThreads; tid++) {
|
||||||
bool bindRegs = (tid <= activeThreads - 1);
|
bool bindRegs = (tid <= active_threads - 1);
|
||||||
|
|
||||||
commitRenameMap[tid].init(TheISA::NumIntRegs,
|
commitRenameMap[tid].init(TheISA::NumIntRegs,
|
||||||
params->numPhysIntRegs,
|
params->numPhysIntRegs,
|
||||||
|
@ -357,7 +358,7 @@ FullO3CPU<Impl>::tick()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (activityCount && !tickEvent.scheduled()) {
|
if (activityCount && !tickEvent.scheduled()) {
|
||||||
tickEvent.schedule(curTick + 1);
|
tickEvent.schedule(curTick + cycles(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
|
@ -370,8 +371,8 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
FullO3CPU<Impl>::init()
|
FullO3CPU<Impl>::init()
|
||||||
{
|
{
|
||||||
if (deferRegistration) {
|
if (!deferRegistration) {
|
||||||
return;
|
registerExecContexts();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set inSyscall so that the CPU doesn't squash when initially
|
// Set inSyscall so that the CPU doesn't squash when initially
|
||||||
|
@ -379,7 +380,6 @@ FullO3CPU<Impl>::init()
|
||||||
for (int i = 0; i < number_of_threads; ++i)
|
for (int i = 0; i < number_of_threads; ++i)
|
||||||
thread[i]->inSyscall = true;
|
thread[i]->inSyscall = true;
|
||||||
|
|
||||||
registerExecContexts();
|
|
||||||
|
|
||||||
// Need to do a copy of the xc->regs into the CPU's regfile so
|
// Need to do a copy of the xc->regs into the CPU's regfile so
|
||||||
// that it can start properly.
|
// that it can start properly.
|
||||||
|
@ -388,7 +388,7 @@ FullO3CPU<Impl>::init()
|
||||||
// Need to do a copy of the xc->regs into the CPU's regfile so
|
// Need to do a copy of the xc->regs into the CPU's regfile so
|
||||||
// that it can start properly.
|
// that it can start properly.
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
ExecContext *src_xc = system->execContexts[tid];
|
ExecContext *src_xc = execContexts[tid];
|
||||||
#else
|
#else
|
||||||
ExecContext *src_xc = thread[tid]->getXCProxy();
|
ExecContext *src_xc = thread[tid]->getXCProxy();
|
||||||
#endif
|
#endif
|
||||||
|
@ -584,7 +584,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
|
||||||
activeThreads.push_back(tid);
|
activeThreads.push_back(tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(_status == Idle);
|
assert(_status == Idle || _status == SwitchedOut);
|
||||||
|
|
||||||
scheduleTickEvent(delay);
|
scheduleTickEvent(delay);
|
||||||
|
|
||||||
|
@ -658,21 +658,64 @@ FullO3CPU<Impl>::haltContext(int tid)
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
FullO3CPU<Impl>::switchOut()
|
FullO3CPU<Impl>::switchOut(Sampler *sampler)
|
||||||
{
|
{
|
||||||
panic("FullO3CPU does not have a switch out function.\n");
|
// panic("FullO3CPU does not have a switch out function.\n");
|
||||||
|
fetch.switchOut();
|
||||||
|
decode.switchOut();
|
||||||
|
rename.switchOut();
|
||||||
|
iew.switchOut();
|
||||||
|
commit.switchOut();
|
||||||
|
if (tickEvent.scheduled())
|
||||||
|
tickEvent.squash();
|
||||||
|
sampler->signalSwitched();
|
||||||
|
_status = SwitchedOut;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 6; ++i) {
|
||||||
|
timeBuffer.advance();
|
||||||
|
fetchQueue.advance();
|
||||||
|
decodeQueue.advance();
|
||||||
|
renameQueue.advance();
|
||||||
|
iewQueue.advance();
|
||||||
|
activityBuffer.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
activityCount = 0;
|
||||||
|
bzero(&stageActive, sizeof(stageActive));
|
||||||
|
|
||||||
BaseCPU::takeOverFrom(oldCPU);
|
BaseCPU::takeOverFrom(oldCPU);
|
||||||
|
|
||||||
|
fetch.takeOverFrom();
|
||||||
|
decode.takeOverFrom();
|
||||||
|
rename.takeOverFrom();
|
||||||
|
iew.takeOverFrom();
|
||||||
|
commit.takeOverFrom();
|
||||||
|
|
||||||
assert(!tickEvent.scheduled());
|
assert(!tickEvent.scheduled());
|
||||||
|
|
||||||
|
// @todo: Figure out how to properly select the tid to put onto the active threads list.
|
||||||
|
int tid = 0;
|
||||||
|
|
||||||
|
list<unsigned>::iterator isActive = find(
|
||||||
|
activeThreads.begin(), activeThreads.end(), tid);
|
||||||
|
|
||||||
|
if (isActive == activeThreads.end()) {
|
||||||
|
//May Need to Re-code this if the delay variable is the
|
||||||
|
//delay needed for thread to activate
|
||||||
|
DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
|
||||||
|
tid);
|
||||||
|
|
||||||
|
activeThreads.push_back(tid);
|
||||||
|
}
|
||||||
|
|
||||||
// Set all status's to active, schedule the
|
// Set all status's to active, schedule the
|
||||||
// CPU's tick event.
|
// CPU's tick event.
|
||||||
|
// @todo: Fix up statuses so this is handled properly
|
||||||
for (int i = 0; i < execContexts.size(); ++i) {
|
for (int i = 0; i < execContexts.size(); ++i) {
|
||||||
ExecContext *xc = execContexts[i];
|
ExecContext *xc = execContexts[i];
|
||||||
if (xc->status() == ExecContext::Active && _status != Running) {
|
if (xc->status() == ExecContext::Active && _status != Running) {
|
||||||
|
@ -680,6 +723,8 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
||||||
tickEvent.schedule(curTick);
|
tickEvent.schedule(curTick);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!tickEvent.scheduled())
|
||||||
|
tickEvent.schedule(curTick);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -758,7 +803,8 @@ template <class Impl>
|
||||||
float
|
float
|
||||||
FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
|
FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
|
||||||
{
|
{
|
||||||
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
|
int idx = reg_idx + TheISA::FP_Base_DepTag;
|
||||||
|
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
|
||||||
|
|
||||||
return regFile.readFloatRegSingle(phys_reg);
|
return regFile.readFloatRegSingle(phys_reg);
|
||||||
}
|
}
|
||||||
|
@ -767,7 +813,8 @@ template <class Impl>
|
||||||
double
|
double
|
||||||
FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
|
FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
|
||||||
{
|
{
|
||||||
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
|
int idx = reg_idx + TheISA::FP_Base_DepTag;
|
||||||
|
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
|
||||||
|
|
||||||
return regFile.readFloatRegDouble(phys_reg);
|
return regFile.readFloatRegDouble(phys_reg);
|
||||||
}
|
}
|
||||||
|
@ -776,7 +823,8 @@ template <class Impl>
|
||||||
uint64_t
|
uint64_t
|
||||||
FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
|
FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
|
||||||
{
|
{
|
||||||
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
|
int idx = reg_idx + TheISA::FP_Base_DepTag;
|
||||||
|
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
|
||||||
|
|
||||||
return regFile.readFloatRegInt(phys_reg);
|
return regFile.readFloatRegInt(phys_reg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,7 +82,8 @@ class FullO3CPU : public BaseFullCPU
|
||||||
Running,
|
Running,
|
||||||
Idle,
|
Idle,
|
||||||
Halted,
|
Halted,
|
||||||
Blocked
|
Blocked,
|
||||||
|
SwitchedOut
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Overall CPU status. */
|
/** Overall CPU status. */
|
||||||
|
@ -112,9 +113,9 @@ class FullO3CPU : public BaseFullCPU
|
||||||
void scheduleTickEvent(int delay)
|
void scheduleTickEvent(int delay)
|
||||||
{
|
{
|
||||||
if (tickEvent.squashed())
|
if (tickEvent.squashed())
|
||||||
tickEvent.reschedule(curTick + delay);
|
tickEvent.reschedule(curTick + cycles(delay));
|
||||||
else if (!tickEvent.scheduled())
|
else if (!tickEvent.scheduled())
|
||||||
tickEvent.schedule(curTick + delay);
|
tickEvent.schedule(curTick + cycles(delay));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Unschedule tick event, regardless of its current state. */
|
/** Unschedule tick event, regardless of its current state. */
|
||||||
|
@ -196,7 +197,7 @@ class FullO3CPU : public BaseFullCPU
|
||||||
/** Switches out this CPU.
|
/** Switches out this CPU.
|
||||||
* @todo: Implement this.
|
* @todo: Implement this.
|
||||||
*/
|
*/
|
||||||
void switchOut();
|
void switchOut(Sampler *sampler);
|
||||||
|
|
||||||
/** Takes over from another CPU.
|
/** Takes over from another CPU.
|
||||||
* @todo: Implement this.
|
* @todo: Implement this.
|
||||||
|
|
|
@ -107,6 +107,9 @@ class DefaultDecode
|
||||||
/** Sets pointer to list of active threads. */
|
/** Sets pointer to list of active threads. */
|
||||||
void setActiveThreads(std::list<unsigned> *at_ptr);
|
void setActiveThreads(std::list<unsigned> *at_ptr);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
/** Ticks decode, processing all input signals and decoding as many
|
/** Ticks decode, processing all input signals and decoding as many
|
||||||
* instructions as possible.
|
* instructions as possible.
|
||||||
*/
|
*/
|
||||||
|
@ -272,6 +275,8 @@ class DefaultDecode
|
||||||
Stats::Scalar<> decodeUnblockCycles;
|
Stats::Scalar<> decodeUnblockCycles;
|
||||||
/** Stat for total number of squashing cycles. */
|
/** Stat for total number of squashing cycles. */
|
||||||
Stats::Scalar<> decodeSquashCycles;
|
Stats::Scalar<> decodeSquashCycles;
|
||||||
|
/** Stat for number of times a branch is resolved at decode. */
|
||||||
|
Stats::Scalar<> decodeBranchResolved;
|
||||||
/** Stat for number of times a branch mispredict is detected. */
|
/** Stat for number of times a branch mispredict is detected. */
|
||||||
Stats::Scalar<> decodeBranchMispred;
|
Stats::Scalar<> decodeBranchMispred;
|
||||||
/** Stat for number of times decode detected a non-control instruction
|
/** Stat for number of times decode detected a non-control instruction
|
||||||
|
|
|
@ -66,40 +66,44 @@ void
|
||||||
DefaultDecode<Impl>::regStats()
|
DefaultDecode<Impl>::regStats()
|
||||||
{
|
{
|
||||||
decodeIdleCycles
|
decodeIdleCycles
|
||||||
.name(name() + ".decodeIdleCycles")
|
.name(name() + ".DECODE:IdleCycles")
|
||||||
.desc("Number of cycles decode is idle")
|
.desc("Number of cycles decode is idle")
|
||||||
.prereq(decodeIdleCycles);
|
.prereq(decodeIdleCycles);
|
||||||
decodeBlockedCycles
|
decodeBlockedCycles
|
||||||
.name(name() + ".decodeBlockedCycles")
|
.name(name() + ".DECODE:BlockedCycles")
|
||||||
.desc("Number of cycles decode is blocked")
|
.desc("Number of cycles decode is blocked")
|
||||||
.prereq(decodeBlockedCycles);
|
.prereq(decodeBlockedCycles);
|
||||||
decodeRunCycles
|
decodeRunCycles
|
||||||
.name(name() + ".decodeRunCycles")
|
.name(name() + ".DECODE:RunCycles")
|
||||||
.desc("Number of cycles decode is running")
|
.desc("Number of cycles decode is running")
|
||||||
.prereq(decodeRunCycles);
|
.prereq(decodeRunCycles);
|
||||||
decodeUnblockCycles
|
decodeUnblockCycles
|
||||||
.name(name() + ".decodeUnblockCycles")
|
.name(name() + ".DECODE:UnblockCycles")
|
||||||
.desc("Number of cycles decode is unblocking")
|
.desc("Number of cycles decode is unblocking")
|
||||||
.prereq(decodeUnblockCycles);
|
.prereq(decodeUnblockCycles);
|
||||||
decodeSquashCycles
|
decodeSquashCycles
|
||||||
.name(name() + ".decodeSquashCycles")
|
.name(name() + ".DECODE:SquashCycles")
|
||||||
.desc("Number of cycles decode is squashing")
|
.desc("Number of cycles decode is squashing")
|
||||||
.prereq(decodeSquashCycles);
|
.prereq(decodeSquashCycles);
|
||||||
|
decodeBranchResolved
|
||||||
|
.name(name() + ".DECODE:BranchResolved")
|
||||||
|
.desc("Number of times decode resolved a branch")
|
||||||
|
.prereq(decodeBranchResolved);
|
||||||
decodeBranchMispred
|
decodeBranchMispred
|
||||||
.name(name() + ".decodeBranchMispred")
|
.name(name() + ".DECODE:BranchMispred")
|
||||||
.desc("Number of times decode detected a branch misprediction")
|
.desc("Number of times decode detected a branch misprediction")
|
||||||
.prereq(decodeBranchMispred);
|
.prereq(decodeBranchMispred);
|
||||||
decodeControlMispred
|
decodeControlMispred
|
||||||
.name(name() + ".decodeControlMispred")
|
.name(name() + ".DECODE:ControlMispred")
|
||||||
.desc("Number of times decode detected an instruction incorrectly"
|
.desc("Number of times decode detected an instruction incorrectly"
|
||||||
" predicted as a control")
|
" predicted as a control")
|
||||||
.prereq(decodeControlMispred);
|
.prereq(decodeControlMispred);
|
||||||
decodeDecodedInsts
|
decodeDecodedInsts
|
||||||
.name(name() + ".decodeDecodedInsts")
|
.name(name() + ".DECODE:DecodedInsts")
|
||||||
.desc("Number of instructions handled by decode")
|
.desc("Number of instructions handled by decode")
|
||||||
.prereq(decodeDecodedInsts);
|
.prereq(decodeDecodedInsts);
|
||||||
decodeSquashedInsts
|
decodeSquashedInsts
|
||||||
.name(name() + ".decodeSquashedInsts")
|
.name(name() + ".DECODE:SquashedInsts")
|
||||||
.desc("Number of squashed instructions handled by decode")
|
.desc("Number of squashed instructions handled by decode")
|
||||||
.prereq(decodeSquashedInsts);
|
.prereq(decodeSquashedInsts);
|
||||||
}
|
}
|
||||||
|
@ -158,6 +162,33 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
|
||||||
activeThreads = at_ptr;
|
activeThreads = at_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultDecode<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultDecode<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
_status = Inactive;
|
||||||
|
|
||||||
|
for (int i = 0; i < numThreads; ++i) {
|
||||||
|
decodeStatus[i] = Idle;
|
||||||
|
|
||||||
|
stalls[i].rename = false;
|
||||||
|
stalls[i].iew = false;
|
||||||
|
stalls[i].commit = false;
|
||||||
|
while (!insts[i].empty())
|
||||||
|
insts[i].pop();
|
||||||
|
while (!skidBuffer[i].empty())
|
||||||
|
skidBuffer[i].pop();
|
||||||
|
branchCount[i] = 0;
|
||||||
|
}
|
||||||
|
wroteToTimeBuffer = false;
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
bool
|
bool
|
||||||
DefaultDecode<Impl>::checkStall(unsigned tid) const
|
DefaultDecode<Impl>::checkStall(unsigned tid) const
|
||||||
|
@ -680,6 +711,7 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
|
||||||
|
|
||||||
// Go ahead and compute any PC-relative branches.
|
// Go ahead and compute any PC-relative branches.
|
||||||
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
|
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
|
||||||
|
++decodeBranchResolved;
|
||||||
inst->setNextPC(inst->branchTarget());
|
inst->setNextPC(inst->branchTarget());
|
||||||
|
|
||||||
if (inst->mispredicted()) {
|
if (inst->mispredicted()) {
|
||||||
|
|
|
@ -35,6 +35,8 @@
|
||||||
#include "mem/mem_interface.hh"
|
#include "mem/mem_interface.hh"
|
||||||
#include "sim/eventq.hh"
|
#include "sim/eventq.hh"
|
||||||
|
|
||||||
|
class Sampler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DefaultFetch class handles both single threaded and SMT fetch. Its width is
|
* DefaultFetch class handles both single threaded and SMT fetch. Its width is
|
||||||
* specified by the parameters; each cycle it tries to fetch that many
|
* specified by the parameters; each cycle it tries to fetch that many
|
||||||
|
@ -81,6 +83,7 @@ class DefaultFetch
|
||||||
Fetching,
|
Fetching,
|
||||||
TrapPending,
|
TrapPending,
|
||||||
QuiescePending,
|
QuiescePending,
|
||||||
|
SwitchOut,
|
||||||
IcacheMissStall,
|
IcacheMissStall,
|
||||||
IcacheMissComplete
|
IcacheMissComplete
|
||||||
};
|
};
|
||||||
|
@ -160,6 +163,12 @@ class DefaultFetch
|
||||||
/** Processes cache completion event. */
|
/** Processes cache completion event. */
|
||||||
void processCacheCompletion(MemReqPtr &req);
|
void processCacheCompletion(MemReqPtr &req);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
void wakeFromQuiesce();
|
void wakeFromQuiesce();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -360,6 +369,8 @@ class DefaultFetch
|
||||||
|
|
||||||
bool interruptPending;
|
bool interruptPending;
|
||||||
|
|
||||||
|
bool switchedOut;
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
/** Page table pointer. */
|
/** Page table pointer. */
|
||||||
// PageTable *pTable;
|
// PageTable *pTable;
|
||||||
|
@ -382,6 +393,8 @@ class DefaultFetch
|
||||||
*/
|
*/
|
||||||
Stats::Scalar<> fetchIdleCycles;
|
Stats::Scalar<> fetchIdleCycles;
|
||||||
Stats::Scalar<> fetchBlockedCycles;
|
Stats::Scalar<> fetchBlockedCycles;
|
||||||
|
|
||||||
|
Stats::Scalar<> fetchMiscStallCycles;
|
||||||
/** Stat for total number of fetched cache lines. */
|
/** Stat for total number of fetched cache lines. */
|
||||||
Stats::Scalar<> fetchedCacheLines;
|
Stats::Scalar<> fetchedCacheLines;
|
||||||
|
|
||||||
|
|
|
@ -169,53 +169,59 @@ void
|
||||||
DefaultFetch<Impl>::regStats()
|
DefaultFetch<Impl>::regStats()
|
||||||
{
|
{
|
||||||
icacheStallCycles
|
icacheStallCycles
|
||||||
.name(name() + ".icacheStallCycles")
|
.name(name() + ".FETCH:icacheStallCycles")
|
||||||
.desc("Number of cycles fetch is stalled on an Icache miss")
|
.desc("Number of cycles fetch is stalled on an Icache miss")
|
||||||
.prereq(icacheStallCycles);
|
.prereq(icacheStallCycles);
|
||||||
|
|
||||||
fetchedInsts
|
fetchedInsts
|
||||||
.name(name() + ".fetchedInsts")
|
.name(name() + ".FETCH:Insts")
|
||||||
.desc("Number of instructions fetch has processed")
|
.desc("Number of instructions fetch has processed")
|
||||||
.prereq(fetchedInsts);
|
.prereq(fetchedInsts);
|
||||||
|
|
||||||
fetchedBranches
|
fetchedBranches
|
||||||
.name(name() + ".fetchedBranches")
|
.name(name() + ".FETCH:Branches")
|
||||||
.desc("Number of branches that fetch encountered")
|
.desc("Number of branches that fetch encountered")
|
||||||
.prereq(fetchedBranches);
|
.prereq(fetchedBranches);
|
||||||
|
|
||||||
predictedBranches
|
predictedBranches
|
||||||
.name(name() + ".predictedBranches")
|
.name(name() + ".FETCH:predictedBranches")
|
||||||
.desc("Number of branches that fetch has predicted taken")
|
.desc("Number of branches that fetch has predicted taken")
|
||||||
.prereq(predictedBranches);
|
.prereq(predictedBranches);
|
||||||
|
|
||||||
fetchCycles
|
fetchCycles
|
||||||
.name(name() + ".fetchCycles")
|
.name(name() + ".FETCH:Cycles")
|
||||||
.desc("Number of cycles fetch has run and was not squashing or"
|
.desc("Number of cycles fetch has run and was not squashing or"
|
||||||
" blocked")
|
" blocked")
|
||||||
.prereq(fetchCycles);
|
.prereq(fetchCycles);
|
||||||
|
|
||||||
fetchSquashCycles
|
fetchSquashCycles
|
||||||
.name(name() + ".fetchSquashCycles")
|
.name(name() + ".FETCH:SquashCycles")
|
||||||
.desc("Number of cycles fetch has spent squashing")
|
.desc("Number of cycles fetch has spent squashing")
|
||||||
.prereq(fetchSquashCycles);
|
.prereq(fetchSquashCycles);
|
||||||
|
|
||||||
fetchIdleCycles
|
fetchIdleCycles
|
||||||
.name(name() + ".fetchIdleCycles")
|
.name(name() + ".FETCH:IdleCycles")
|
||||||
.desc("Number of cycles fetch was idle")
|
.desc("Number of cycles fetch was idle")
|
||||||
.prereq(fetchIdleCycles);
|
.prereq(fetchIdleCycles);
|
||||||
|
|
||||||
fetchBlockedCycles
|
fetchBlockedCycles
|
||||||
.name(name() + ".fetchBlockedCycles")
|
.name(name() + ".FETCH:BlockedCycles")
|
||||||
.desc("Number of cycles fetch has spent blocked")
|
.desc("Number of cycles fetch has spent blocked")
|
||||||
.prereq(fetchBlockedCycles);
|
.prereq(fetchBlockedCycles);
|
||||||
|
|
||||||
fetchedCacheLines
|
fetchedCacheLines
|
||||||
.name(name() + ".fetchedCacheLines")
|
.name(name() + ".FETCH:CacheLines")
|
||||||
.desc("Number of cache lines fetched")
|
.desc("Number of cache lines fetched")
|
||||||
.prereq(fetchedCacheLines);
|
.prereq(fetchedCacheLines);
|
||||||
|
|
||||||
|
fetchMiscStallCycles
|
||||||
|
.name(name() + ".FETCH:MiscStallCycles")
|
||||||
|
.desc("Number of cycles fetch has spent waiting on interrupts, or "
|
||||||
|
"bad addresses, or out of MSHRs")
|
||||||
|
.prereq(fetchMiscStallCycles);
|
||||||
|
|
||||||
fetchIcacheSquashes
|
fetchIcacheSquashes
|
||||||
.name(name() + ".fetchIcacheSquashes")
|
.name(name() + ".FETCH:IcacheSquashes")
|
||||||
.desc("Number of outstanding Icache misses that were squashed")
|
.desc("Number of outstanding Icache misses that were squashed")
|
||||||
.prereq(fetchIcacheSquashes);
|
.prereq(fetchIcacheSquashes);
|
||||||
|
|
||||||
|
@ -223,24 +229,24 @@ DefaultFetch<Impl>::regStats()
|
||||||
.init(/* base value */ 0,
|
.init(/* base value */ 0,
|
||||||
/* last value */ fetchWidth,
|
/* last value */ fetchWidth,
|
||||||
/* bucket size */ 1)
|
/* bucket size */ 1)
|
||||||
.name(name() + ".rateDist")
|
.name(name() + ".FETCH:rateDist")
|
||||||
.desc("Number of instructions fetched each cycle (Total)")
|
.desc("Number of instructions fetched each cycle (Total)")
|
||||||
.flags(Stats::pdf);
|
.flags(Stats::pdf);
|
||||||
|
|
||||||
idleRate
|
idleRate
|
||||||
.name(name() + ".idleRate")
|
.name(name() + ".FETCH:idleRate")
|
||||||
.desc("Percent of cycles fetch was idle")
|
.desc("Percent of cycles fetch was idle")
|
||||||
.prereq(idleRate);
|
.prereq(idleRate);
|
||||||
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
|
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
|
||||||
|
|
||||||
branchRate
|
branchRate
|
||||||
.name(name() + ".branchRate")
|
.name(name() + ".FETCH:branchRate")
|
||||||
.desc("Number of branch fetches per cycle")
|
.desc("Number of branch fetches per cycle")
|
||||||
.flags(Stats::total);
|
.flags(Stats::total);
|
||||||
branchRate = predictedBranches / cpu->numCycles;
|
branchRate = predictedBranches / cpu->numCycles;
|
||||||
|
|
||||||
fetchRate
|
fetchRate
|
||||||
.name(name() + ".rate")
|
.name(name() + ".FETCH:rate")
|
||||||
.desc("Number of inst fetches per cycle")
|
.desc("Number of inst fetches per cycle")
|
||||||
.flags(Stats::total);
|
.flags(Stats::total);
|
||||||
fetchRate = fetchedInsts / cpu->numCycles;
|
fetchRate = fetchedInsts / cpu->numCycles;
|
||||||
|
@ -332,7 +338,8 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
|
||||||
// Can keep track of how many cache accesses go unused due to
|
// Can keep track of how many cache accesses go unused due to
|
||||||
// misspeculation here.
|
// misspeculation here.
|
||||||
if (fetchStatus[tid] != IcacheMissStall ||
|
if (fetchStatus[tid] != IcacheMissStall ||
|
||||||
req != memReq[tid]) {
|
req != memReq[tid] ||
|
||||||
|
isSwitchedOut()) {
|
||||||
++fetchIcacheSquashes;
|
++fetchIcacheSquashes;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -360,6 +367,35 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
|
||||||
// memReq[tid]->completionEvent = NULL;
|
// memReq[tid]->completionEvent = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultFetch<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
switchedOut = true;
|
||||||
|
branchPred.switchOut();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultFetch<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
// Reset all state
|
||||||
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
|
stalls[i].decode = 0;
|
||||||
|
stalls[i].rename = 0;
|
||||||
|
stalls[i].iew = 0;
|
||||||
|
stalls[i].commit = 0;
|
||||||
|
PC[i] = cpu->readPC(i);
|
||||||
|
nextPC[i] = cpu->readNextPC(i);
|
||||||
|
fetchStatus[i] = Running;
|
||||||
|
}
|
||||||
|
numInst = 0;
|
||||||
|
wroteToTimeBuffer = false;
|
||||||
|
_status = Inactive;
|
||||||
|
switchedOut = false;
|
||||||
|
branchPred.takeOverFrom();
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultFetch<Impl>::wakeFromQuiesce()
|
DefaultFetch<Impl>::wakeFromQuiesce()
|
||||||
|
@ -902,8 +938,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||||
tid, fetch_PC);
|
tid, fetch_PC);
|
||||||
|
|
||||||
bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
|
bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
|
||||||
if (!fetch_success)
|
if (!fetch_success) {
|
||||||
|
++fetchMiscStallCycles;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (fetchStatus[tid] == Idle) {
|
if (fetchStatus[tid] == Idle) {
|
||||||
++fetchIdleCycles;
|
++fetchIdleCycles;
|
||||||
|
|
|
@ -242,6 +242,20 @@ FUPool::dump()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
FUPool::switchOut()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
FUPool::takeOverFrom()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < numFU; i++) {
|
||||||
|
unitBusy[i] = false;
|
||||||
|
}
|
||||||
|
unitsToBeFreed.clear();
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -154,6 +154,9 @@ class FUPool : public SimObject
|
||||||
unsigned getIssueLatency(OpClass capability) {
|
unsigned getIssueLatency(OpClass capability) {
|
||||||
return maxIssueLatencies[capability];
|
return maxIssueLatencies[capability];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
void takeOverFrom();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_O3_FU_POOL_HH__
|
#endif // __CPU_O3_FU_POOL_HH__
|
||||||
|
|
|
@ -157,6 +157,12 @@ class DefaultIEW
|
||||||
/** Sets pointer to the scoreboard. */
|
/** Sets pointer to the scoreboard. */
|
||||||
void setScoreboard(Scoreboard *sb_ptr);
|
void setScoreboard(Scoreboard *sb_ptr);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
/** Sets page table pointer within LSQ. */
|
/** Sets page table pointer within LSQ. */
|
||||||
// void setPageTable(PageTable *pt_ptr);
|
// void setPageTable(PageTable *pt_ptr);
|
||||||
|
|
||||||
|
@ -420,6 +426,8 @@ class DefaultIEW
|
||||||
/** Maximum size of the skid buffer. */
|
/** Maximum size of the skid buffer. */
|
||||||
unsigned skidBufferMax;
|
unsigned skidBufferMax;
|
||||||
|
|
||||||
|
bool switchedOut;
|
||||||
|
|
||||||
/** Stat for total number of idle cycles. */
|
/** Stat for total number of idle cycles. */
|
||||||
Stats::Scalar<> iewIdleCycles;
|
Stats::Scalar<> iewIdleCycles;
|
||||||
/** Stat for total number of squashing cycles. */
|
/** Stat for total number of squashing cycles. */
|
||||||
|
|
|
@ -55,13 +55,13 @@ DefaultIEW<Impl>::LdWritebackEvent::process()
|
||||||
|
|
||||||
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
|
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
|
||||||
|
|
||||||
iewStage->wakeCPU();
|
if (inst->isSquashed() || iewStage->isSwitchedOut()) {
|
||||||
|
|
||||||
if (inst->isSquashed()) {
|
|
||||||
inst = NULL;
|
inst = NULL;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iewStage->wakeCPU();
|
||||||
|
|
||||||
if (!inst->isExecuted()) {
|
if (!inst->isExecuted()) {
|
||||||
inst->setExecuted();
|
inst->setExecuted();
|
||||||
|
|
||||||
|
@ -101,7 +101,8 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
|
||||||
issueReadWidth(params->issueWidth),
|
issueReadWidth(params->issueWidth),
|
||||||
issueWidth(params->issueWidth),
|
issueWidth(params->issueWidth),
|
||||||
executeWidth(params->executeWidth),
|
executeWidth(params->executeWidth),
|
||||||
numThreads(params->numberOfThreads)
|
numThreads(params->numberOfThreads),
|
||||||
|
switchedOut(false)
|
||||||
{
|
{
|
||||||
DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth);
|
DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth);
|
||||||
_status = Active;
|
_status = Active;
|
||||||
|
@ -436,6 +437,53 @@ DefaultIEW<Impl>::setPageTable(PageTable *pt_ptr)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultIEW<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
switchedOut = true;
|
||||||
|
instQueue.switchOut();
|
||||||
|
ldstQueue.switchOut();
|
||||||
|
fuPool->switchOut();
|
||||||
|
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
while (!insts[i].empty())
|
||||||
|
insts[i].pop();
|
||||||
|
while (!skidBuffer[i].empty())
|
||||||
|
skidBuffer[i].pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultIEW<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
_status = Active;
|
||||||
|
exeStatus = Running;
|
||||||
|
wbStatus = Idle;
|
||||||
|
switchedOut = false;
|
||||||
|
|
||||||
|
instQueue.takeOverFrom();
|
||||||
|
ldstQueue.takeOverFrom();
|
||||||
|
fuPool->takeOverFrom();
|
||||||
|
|
||||||
|
initStage();
|
||||||
|
cpu->activityThisCycle();
|
||||||
|
|
||||||
|
for (int i=0; i < numThreads; i++) {
|
||||||
|
dispatchStatus[i] = Running;
|
||||||
|
stalls[i].commit = false;
|
||||||
|
fetchRedirect[i] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
updateLSQNextCycle = false;
|
||||||
|
|
||||||
|
// @todo: Fix hardcoded number
|
||||||
|
for (int i = 0; i < 6; ++i) {
|
||||||
|
issueToExecQueue.advance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::squash(unsigned tid)
|
DefaultIEW<Impl>::squash(unsigned tid)
|
||||||
|
|
|
@ -112,6 +112,10 @@ class InstructionQueue
|
||||||
/** Registers statistics. */
|
/** Registers statistics. */
|
||||||
void regStats();
|
void regStats();
|
||||||
|
|
||||||
|
void resetState();
|
||||||
|
|
||||||
|
void resetDependencyGraph();
|
||||||
|
|
||||||
/** Sets CPU pointer. */
|
/** Sets CPU pointer. */
|
||||||
void setCPU(FullCPU *_cpu) { cpu = _cpu; }
|
void setCPU(FullCPU *_cpu) { cpu = _cpu; }
|
||||||
|
|
||||||
|
@ -127,6 +131,12 @@ class InstructionQueue
|
||||||
/** Sets the global time buffer. */
|
/** Sets the global time buffer. */
|
||||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
/** Number of entries needed for given amount of threads. */
|
/** Number of entries needed for given amount of threads. */
|
||||||
int entryAmount(int num_threads);
|
int entryAmount(int num_threads);
|
||||||
|
|
||||||
|
@ -385,6 +395,8 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
unsigned commitToIEWDelay;
|
unsigned commitToIEWDelay;
|
||||||
|
|
||||||
|
bool switchedOut;
|
||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
// Variables needed for squashing
|
// Variables needed for squashing
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
|
@ -507,7 +519,7 @@ class InstructionQueue
|
||||||
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
||||||
|
|
||||||
Stats::VectorDistribution<> queue_res_dist;
|
Stats::VectorDistribution<> queue_res_dist;
|
||||||
Stats::Vector<> n_issued_dist;
|
Stats::Distribution<> n_issued_dist;
|
||||||
Stats::VectorDistribution<> issue_delay_dist;
|
Stats::VectorDistribution<> issue_delay_dist;
|
||||||
|
|
||||||
Stats::Vector<> stat_fu_busy;
|
Stats::Vector<> stat_fu_busy;
|
||||||
|
|
|
@ -82,16 +82,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
|
||||||
{
|
{
|
||||||
assert(fuPool);
|
assert(fuPool);
|
||||||
|
|
||||||
|
switchedOut = false;
|
||||||
|
|
||||||
numThreads = params->numberOfThreads;
|
numThreads = params->numberOfThreads;
|
||||||
|
|
||||||
//Initialize thread IQ counts
|
|
||||||
for (int i = 0; i <numThreads; i++) {
|
|
||||||
count[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the number of free IQ entries.
|
|
||||||
freeEntries = numEntries;
|
|
||||||
|
|
||||||
// Set the number of physical registers as the number of int + float
|
// Set the number of physical registers as the number of int + float
|
||||||
numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
|
numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
|
||||||
|
|
||||||
|
@ -101,6 +95,13 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
|
||||||
//dependency graph.
|
//dependency graph.
|
||||||
dependGraph = new DependencyEntry[numPhysRegs];
|
dependGraph = new DependencyEntry[numPhysRegs];
|
||||||
|
|
||||||
|
// Initialize all the head pointers to point to NULL, and all the
|
||||||
|
// entries as unready.
|
||||||
|
for (int i = 0; i < numPhysRegs; ++i) {
|
||||||
|
dependGraph[i].next = NULL;
|
||||||
|
dependGraph[i].inst = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
// Resize the register scoreboard.
|
// Resize the register scoreboard.
|
||||||
regScoreboard.resize(numPhysRegs);
|
regScoreboard.resize(numPhysRegs);
|
||||||
|
|
||||||
|
@ -110,27 +111,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
|
||||||
memDepUnit[i].setIQ(this);
|
memDepUnit[i].setIQ(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize all the head pointers to point to NULL, and all the
|
resetState();
|
||||||
// entries as unready.
|
|
||||||
// Note that in actuality, the registers corresponding to the logical
|
|
||||||
// registers start off as ready. However this doesn't matter for the
|
|
||||||
// IQ as the instruction should have been correctly told if those
|
|
||||||
// registers are ready in rename. Thus it can all be initialized as
|
|
||||||
// unready.
|
|
||||||
for (int i = 0; i < numPhysRegs; ++i) {
|
|
||||||
dependGraph[i].next = NULL;
|
|
||||||
dependGraph[i].inst = NULL;
|
|
||||||
regScoreboard[i] = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < numThreads; ++i) {
|
|
||||||
squashedSeqNum[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < Num_OpClasses; ++i) {
|
|
||||||
queueOnList[i] = false;
|
|
||||||
readyIt[i] = listOrder.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
string policy = params->smtIQPolicy;
|
string policy = params->smtIQPolicy;
|
||||||
|
|
||||||
|
@ -184,30 +165,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
InstructionQueue<Impl>::~InstructionQueue()
|
InstructionQueue<Impl>::~InstructionQueue()
|
||||||
{
|
{
|
||||||
// Clear the dependency graph
|
resetDependencyGraph();
|
||||||
DependencyEntry *curr;
|
|
||||||
DependencyEntry *prev;
|
|
||||||
|
|
||||||
for (int i = 0; i < numPhysRegs; ++i) {
|
|
||||||
curr = dependGraph[i].next;
|
|
||||||
|
|
||||||
while (curr) {
|
|
||||||
DependencyEntry::mem_alloc_counter--;
|
|
||||||
|
|
||||||
prev = curr;
|
|
||||||
curr = prev->next;
|
|
||||||
prev->inst = NULL;
|
|
||||||
|
|
||||||
delete prev;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dependGraph[i].inst) {
|
|
||||||
dependGraph[i].inst = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
dependGraph[i].next = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(DependencyEntry::mem_alloc_counter == 0);
|
assert(DependencyEntry::mem_alloc_counter == 0);
|
||||||
|
|
||||||
delete [] dependGraph;
|
delete [] dependGraph;
|
||||||
|
@ -307,10 +265,10 @@ InstructionQueue<Impl>::regStats()
|
||||||
queue_res_dist.subname(i, opClassStrings[i]);
|
queue_res_dist.subname(i, opClassStrings[i]);
|
||||||
}
|
}
|
||||||
n_issued_dist
|
n_issued_dist
|
||||||
.init(totalWidth + 1)
|
.init(0,totalWidth,1)
|
||||||
.name(name() + ".ISSUE:issued_per_cycle")
|
.name(name() + ".ISSUE:issued_per_cycle")
|
||||||
.desc("Number of insts issued each cycle")
|
.desc("Number of insts issued each cycle")
|
||||||
.flags(total | pdf | dist)
|
.flags(pdf)
|
||||||
;
|
;
|
||||||
/*
|
/*
|
||||||
dist_unissued
|
dist_unissued
|
||||||
|
@ -400,6 +358,71 @@ InstructionQueue<Impl>::regStats()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
InstructionQueue<Impl>::resetState()
|
||||||
|
{
|
||||||
|
//Initialize thread IQ counts
|
||||||
|
for (int i = 0; i <numThreads; i++) {
|
||||||
|
count[i] = 0;
|
||||||
|
instList[i].clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the number of free IQ entries.
|
||||||
|
freeEntries = numEntries;
|
||||||
|
|
||||||
|
// Note that in actuality, the registers corresponding to the logical
|
||||||
|
// registers start off as ready. However this doesn't matter for the
|
||||||
|
// IQ as the instruction should have been correctly told if those
|
||||||
|
// registers are ready in rename. Thus it can all be initialized as
|
||||||
|
// unready.
|
||||||
|
for (int i = 0; i < numPhysRegs; ++i) {
|
||||||
|
regScoreboard[i] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numThreads; ++i) {
|
||||||
|
squashedSeqNum[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||||
|
while (!readyInsts[i].empty())
|
||||||
|
readyInsts[i].pop();
|
||||||
|
queueOnList[i] = false;
|
||||||
|
readyIt[i] = listOrder.end();
|
||||||
|
}
|
||||||
|
nonSpecInsts.clear();
|
||||||
|
listOrder.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
InstructionQueue<Impl>::resetDependencyGraph()
|
||||||
|
{
|
||||||
|
// Clear the dependency graph
|
||||||
|
DependencyEntry *curr;
|
||||||
|
DependencyEntry *prev;
|
||||||
|
|
||||||
|
for (int i = 0; i < numPhysRegs; ++i) {
|
||||||
|
curr = dependGraph[i].next;
|
||||||
|
|
||||||
|
while (curr) {
|
||||||
|
DependencyEntry::mem_alloc_counter--;
|
||||||
|
|
||||||
|
prev = curr;
|
||||||
|
curr = prev->next;
|
||||||
|
prev->inst = NULL;
|
||||||
|
|
||||||
|
delete prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dependGraph[i].inst) {
|
||||||
|
dependGraph[i].inst = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
dependGraph[i].next = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
|
InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
|
||||||
|
@ -426,6 +449,25 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
|
||||||
fromCommit = timeBuffer->getWire(-commitToIEWDelay);
|
fromCommit = timeBuffer->getWire(-commitToIEWDelay);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
InstructionQueue<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
resetState();
|
||||||
|
resetDependencyGraph();
|
||||||
|
switchedOut = true;
|
||||||
|
for (int i = 0; i < numThreads; ++i) {
|
||||||
|
memDepUnit[i].switchOut();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
InstructionQueue<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
switchedOut = false;
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
int
|
int
|
||||||
InstructionQueue<Impl>::entryAmount(int num_threads)
|
InstructionQueue<Impl>::entryAmount(int num_threads)
|
||||||
|
@ -685,6 +727,10 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
|
||||||
{
|
{
|
||||||
// The CPU could have been sleeping until this op completed (*extremely*
|
// The CPU could have been sleeping until this op completed (*extremely*
|
||||||
// long latency op). Wake it if it was. This may be overkill.
|
// long latency op). Wake it if it was. This may be overkill.
|
||||||
|
if (isSwitchedOut()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
iewStage->wakeCPU();
|
iewStage->wakeCPU();
|
||||||
|
|
||||||
fuPool->freeUnit(fu_idx);
|
fuPool->freeUnit(fu_idx);
|
||||||
|
@ -816,7 +862,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
FUCompletion *execution = new FUCompletion(issuing_inst,
|
FUCompletion *execution = new FUCompletion(issuing_inst,
|
||||||
idx, this);
|
idx, this);
|
||||||
|
|
||||||
execution->schedule(curTick + issue_latency - 1);
|
execution->schedule(curTick + cpu->cycles(issue_latency - 1));
|
||||||
} else {
|
} else {
|
||||||
i2e_info->insts[exec_queue_slot++] = issuing_inst;
|
i2e_info->insts[exec_queue_slot++] = issuing_inst;
|
||||||
i2e_info->size++;
|
i2e_info->size++;
|
||||||
|
@ -862,6 +908,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
n_issued_dist.sample(total_issued);
|
||||||
|
|
||||||
if (total_issued) {
|
if (total_issued) {
|
||||||
cpu->activityThisCycle();
|
cpu->activityThisCycle();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -71,6 +71,9 @@ class LSQ {
|
||||||
/** Sets the page table pointer. */
|
/** Sets the page table pointer. */
|
||||||
// void setPageTable(PageTable *pt_ptr);
|
// void setPageTable(PageTable *pt_ptr);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Number of entries needed for the given amount of threads.*/
|
/** Number of entries needed for the given amount of threads.*/
|
||||||
int entryAmount(int num_threads);
|
int entryAmount(int num_threads);
|
||||||
void removeEntries(unsigned tid);
|
void removeEntries(unsigned tid);
|
||||||
|
@ -271,15 +274,6 @@ class LSQ {
|
||||||
/** Max SQ Size - Used to Enforce Sharing Policies. */
|
/** Max SQ Size - Used to Enforce Sharing Policies. */
|
||||||
unsigned maxSQEntries;
|
unsigned maxSQEntries;
|
||||||
|
|
||||||
/** Global Load Count. */
|
|
||||||
int loads;
|
|
||||||
|
|
||||||
/** Global Store Count */
|
|
||||||
int stores;
|
|
||||||
|
|
||||||
/** Global Store To WB Count */
|
|
||||||
int storesToWB;
|
|
||||||
|
|
||||||
/** Number of Threads. */
|
/** Number of Threads. */
|
||||||
unsigned numThreads;
|
unsigned numThreads;
|
||||||
};
|
};
|
||||||
|
|
|
@ -33,7 +33,6 @@ using namespace std;
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
LSQ<Impl>::LSQ(Params *params)
|
LSQ<Impl>::LSQ(Params *params)
|
||||||
: LQEntries(params->LQEntries), SQEntries(params->SQEntries),
|
: LQEntries(params->LQEntries), SQEntries(params->SQEntries),
|
||||||
loads(0), stores(0), storesToWB(0),
|
|
||||||
numThreads(params->numberOfThreads)
|
numThreads(params->numberOfThreads)
|
||||||
{
|
{
|
||||||
DPRINTF(LSQ, "Creating LSQ object.\n");
|
DPRINTF(LSQ, "Creating LSQ object.\n");
|
||||||
|
@ -143,6 +142,24 @@ LSQ<Impl>::setPageTable(PageTable *pt_ptr)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
LSQ<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
for (int tid = 0; tid < numThreads; tid++) {
|
||||||
|
thread[tid].switchOut();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
LSQ<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
for (int tid = 0; tid < numThreads; tid++) {
|
||||||
|
thread[tid].takeOverFrom();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
int
|
int
|
||||||
LSQ<Impl>::entryAmount(int num_threads)
|
LSQ<Impl>::entryAmount(int num_threads)
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
#include "cpu/inst_seq.hh"
|
#include "cpu/inst_seq.hh"
|
||||||
#include "mem/mem_interface.hh"
|
#include "mem/mem_interface.hh"
|
||||||
//#include "mem/page_table.hh"
|
//#include "mem/page_table.hh"
|
||||||
|
#include "sim/debug.hh"
|
||||||
#include "sim/sim_object.hh"
|
#include "sim/sim_object.hh"
|
||||||
#include "arch/faults.hh"
|
#include "arch/faults.hh"
|
||||||
|
|
||||||
|
@ -110,6 +111,12 @@ class LSQUnit {
|
||||||
/** Sets the page table pointer. */
|
/** Sets the page table pointer. */
|
||||||
// void setPageTable(PageTable *pt_ptr);
|
// void setPageTable(PageTable *pt_ptr);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
/** Ticks the LSQ unit, which in this case only resets the number of
|
/** Ticks the LSQ unit, which in this case only resets the number of
|
||||||
* used cache ports.
|
* used cache ports.
|
||||||
* @todo: Move the number of used ports up to the LSQ level so it can
|
* @todo: Move the number of used ports up to the LSQ level so it can
|
||||||
|
@ -278,20 +285,20 @@ class LSQUnit {
|
||||||
/** Whether or not the store is completed. */
|
/** Whether or not the store is completed. */
|
||||||
bool completed;
|
bool completed;
|
||||||
};
|
};
|
||||||
|
/*
|
||||||
enum Status {
|
enum Status {
|
||||||
Running,
|
Running,
|
||||||
Idle,
|
Idle,
|
||||||
DcacheMissStall,
|
DcacheMissStall,
|
||||||
DcacheMissSwitch
|
DcacheMissSwitch
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
private:
|
private:
|
||||||
/** The LSQUnit thread id. */
|
/** The LSQUnit thread id. */
|
||||||
unsigned lsqID;
|
unsigned lsqID;
|
||||||
|
|
||||||
/** The status of the LSQ unit. */
|
/** The status of the LSQ unit. */
|
||||||
Status _status;
|
// Status _status;
|
||||||
|
|
||||||
/** The store queue. */
|
/** The store queue. */
|
||||||
std::vector<SQEntry> storeQueue;
|
std::vector<SQEntry> storeQueue;
|
||||||
|
@ -335,6 +342,8 @@ class LSQUnit {
|
||||||
/** The number of used cache ports in this cycle. */
|
/** The number of used cache ports in this cycle. */
|
||||||
int usedPorts;
|
int usedPorts;
|
||||||
|
|
||||||
|
bool switchedOut;
|
||||||
|
|
||||||
//list<InstSeqNum> mshrSeqNums;
|
//list<InstSeqNum> mshrSeqNums;
|
||||||
|
|
||||||
//Stats::Scalar<> dcacheStallCycles;
|
//Stats::Scalar<> dcacheStallCycles;
|
||||||
|
@ -373,7 +382,25 @@ class LSQUnit {
|
||||||
// Will also need how many read/write ports the Dcache has. Or keep track
|
// Will also need how many read/write ports the Dcache has. Or keep track
|
||||||
// of that in stage that is one level up, and only call executeLoad/Store
|
// of that in stage that is one level up, and only call executeLoad/Store
|
||||||
// the appropriate number of times.
|
// the appropriate number of times.
|
||||||
|
/*
|
||||||
|
// total number of loads forwaded from LSQ stores
|
||||||
|
Stats::Vector<> lsq_forw_loads;
|
||||||
|
|
||||||
|
// total number of loads ignored due to invalid addresses
|
||||||
|
Stats::Vector<> inv_addr_loads;
|
||||||
|
|
||||||
|
// total number of software prefetches ignored due to invalid addresses
|
||||||
|
Stats::Vector<> inv_addr_swpfs;
|
||||||
|
|
||||||
|
// total non-speculative bogus addresses seen (debug var)
|
||||||
|
Counter sim_invalid_addrs;
|
||||||
|
Stats::Vector<> fu_busy; //cumulative fu busy
|
||||||
|
|
||||||
|
// ready loads blocked due to memory disambiguation
|
||||||
|
Stats::Vector<> lsq_blocked_loads;
|
||||||
|
|
||||||
|
Stats::Scalar<> lsqInversion;
|
||||||
|
*/
|
||||||
public:
|
public:
|
||||||
/** Executes the load at the given index. */
|
/** Executes the load at the given index. */
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -590,7 +617,12 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
|
||||||
}
|
}
|
||||||
DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
|
DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
|
||||||
loadQueue[load_idx]->readPC());
|
loadQueue[load_idx]->readPC());
|
||||||
|
/*
|
||||||
|
Addr debug_addr = ULL(0xfffffc0000be81a8);
|
||||||
|
if (req->vaddr == debug_addr) {
|
||||||
|
debug_break();
|
||||||
|
}
|
||||||
|
*/
|
||||||
assert(!req->completionEvent);
|
assert(!req->completionEvent);
|
||||||
req->completionEvent =
|
req->completionEvent =
|
||||||
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
|
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
|
||||||
|
@ -608,7 +640,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
|
||||||
|
|
||||||
lastDcacheStall = curTick;
|
lastDcacheStall = curTick;
|
||||||
|
|
||||||
_status = DcacheMissStall;
|
// _status = DcacheMissStall;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
|
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
|
||||||
|
@ -694,7 +726,12 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
|
||||||
storeQueue[store_idx].req = req;
|
storeQueue[store_idx].req = req;
|
||||||
storeQueue[store_idx].size = sizeof(T);
|
storeQueue[store_idx].size = sizeof(T);
|
||||||
storeQueue[store_idx].data = data;
|
storeQueue[store_idx].data = data;
|
||||||
|
/*
|
||||||
|
Addr debug_addr = ULL(0xfffffc0000be81a8);
|
||||||
|
if (req->vaddr == debug_addr) {
|
||||||
|
debug_break();
|
||||||
|
}
|
||||||
|
*/
|
||||||
// This function only writes the data to the store queue, so no fault
|
// This function only writes the data to the store queue, so no fault
|
||||||
// can happen here.
|
// can happen here.
|
||||||
return NoFault;
|
return NoFault;
|
||||||
|
|
|
@ -50,6 +50,9 @@ LSQUnit<Impl>::StoreCompletionEvent::process()
|
||||||
|
|
||||||
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
|
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
|
||||||
|
|
||||||
|
if (lsqPtr->isSwitchedOut())
|
||||||
|
return;
|
||||||
|
|
||||||
lsqPtr->cpu->wakeCPU();
|
lsqPtr->cpu->wakeCPU();
|
||||||
if (wbEvent)
|
if (wbEvent)
|
||||||
wbEvent->process();
|
wbEvent->process();
|
||||||
|
@ -78,6 +81,8 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
|
||||||
{
|
{
|
||||||
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
|
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
|
||||||
|
|
||||||
|
switchedOut = false;
|
||||||
|
|
||||||
lsqID = id;
|
lsqID = id;
|
||||||
|
|
||||||
LQEntries = maxLQEntries;
|
LQEntries = maxLQEntries;
|
||||||
|
@ -138,6 +143,89 @@ LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void
|
||||||
|
LSQUnit<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
switchedOut = true;
|
||||||
|
for (int i = 0; i < loadQueue.size(); ++i)
|
||||||
|
loadQueue[i] = NULL;
|
||||||
|
|
||||||
|
while (storesToWB > 0 &&
|
||||||
|
storeWBIdx != storeTail &&
|
||||||
|
storeQueue[storeWBIdx].inst &&
|
||||||
|
storeQueue[storeWBIdx].canWB) {
|
||||||
|
|
||||||
|
if (storeQueue[storeWBIdx].size == 0 ||
|
||||||
|
storeQueue[storeWBIdx].inst->isDataPrefetch() ||
|
||||||
|
storeQueue[storeWBIdx].committed ||
|
||||||
|
storeQueue[storeWBIdx].req->flags & LOCKED) {
|
||||||
|
incrStIdx(storeWBIdx);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(storeQueue[storeWBIdx].req);
|
||||||
|
assert(!storeQueue[storeWBIdx].committed);
|
||||||
|
|
||||||
|
MemReqPtr req = storeQueue[storeWBIdx].req;
|
||||||
|
storeQueue[storeWBIdx].committed = true;
|
||||||
|
|
||||||
|
req->cmd = Write;
|
||||||
|
req->completionEvent = NULL;
|
||||||
|
req->time = curTick;
|
||||||
|
assert(!req->data);
|
||||||
|
req->data = new uint8_t[64];
|
||||||
|
memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
|
||||||
|
|
||||||
|
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
|
||||||
|
"to Addr:%#x, data:%#x [sn:%lli]\n",
|
||||||
|
storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
|
||||||
|
req->paddr, *(req->data),
|
||||||
|
storeQueue[storeWBIdx].inst->seqNum);
|
||||||
|
|
||||||
|
switch(storeQueue[storeWBIdx].size) {
|
||||||
|
case 1:
|
||||||
|
cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
panic("Unexpected store size!\n");
|
||||||
|
}
|
||||||
|
incrStIdx(storeWBIdx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void
|
||||||
|
LSQUnit<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
switchedOut = false;
|
||||||
|
loads = stores = storesToWB = 0;
|
||||||
|
|
||||||
|
loadHead = loadTail = 0;
|
||||||
|
|
||||||
|
storeHead = storeWBIdx = storeTail = 0;
|
||||||
|
|
||||||
|
usedPorts = 0;
|
||||||
|
|
||||||
|
loadFaultInst = storeFaultInst = memDepViolator = NULL;
|
||||||
|
|
||||||
|
blockedLoadSeqNum = 0;
|
||||||
|
|
||||||
|
stalled = false;
|
||||||
|
isLoadBlocked = false;
|
||||||
|
loadBlockedHandled = false;
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void
|
void
|
||||||
LSQUnit<Impl>::resizeLQ(unsigned size)
|
LSQUnit<Impl>::resizeLQ(unsigned size)
|
||||||
|
@ -647,7 +735,7 @@ LSQUnit<Impl>::writebackStores()
|
||||||
|
|
||||||
lastDcacheStall = curTick;
|
lastDcacheStall = curTick;
|
||||||
|
|
||||||
_status = DcacheMissStall;
|
// _status = DcacheMissStall;
|
||||||
|
|
||||||
//mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
|
//mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
|
||||||
|
|
||||||
|
|
|
@ -84,6 +84,10 @@ class MemDepUnit {
|
||||||
/** Registers statistics. */
|
/** Registers statistics. */
|
||||||
void regStats();
|
void regStats();
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Sets the pointer to the IQ. */
|
/** Sets the pointer to the IQ. */
|
||||||
void setIQ(InstructionQueue<Impl> *iq_ptr);
|
void setIQ(InstructionQueue<Impl> *iq_ptr);
|
||||||
|
|
||||||
|
|
|
@ -101,6 +101,26 @@ MemDepUnit<MemDepPred, Impl>::regStats()
|
||||||
.desc("Number of conflicting stores.");
|
.desc("Number of conflicting stores.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class MemDepPred, class Impl>
|
||||||
|
void
|
||||||
|
MemDepUnit<MemDepPred, Impl>::switchOut()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
|
instList[i].clear();
|
||||||
|
}
|
||||||
|
instsToReplay.clear();
|
||||||
|
memDepHash.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class MemDepPred, class Impl>
|
||||||
|
void
|
||||||
|
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
loadBarrier = storeBarrier = false;
|
||||||
|
loadBarrierSN = storeBarrierSN = 0;
|
||||||
|
depPred.clear();
|
||||||
|
}
|
||||||
|
|
||||||
template <class MemDepPred, class Impl>
|
template <class MemDepPred, class Impl>
|
||||||
void
|
void
|
||||||
MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
|
MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
|
||||||
|
|
|
@ -41,6 +41,15 @@ ReturnAddrStack::init(unsigned _numEntries)
|
||||||
addrStack[i] = 0;
|
addrStack[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ReturnAddrStack::reset()
|
||||||
|
{
|
||||||
|
usedEntries = 0;
|
||||||
|
tos = 0;
|
||||||
|
for (int i = 0; i < numEntries; ++i)
|
||||||
|
addrStack[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
ReturnAddrStack::push(const Addr &return_addr)
|
ReturnAddrStack::push(const Addr &return_addr)
|
||||||
{
|
{
|
||||||
|
|
|
@ -47,6 +47,8 @@ class ReturnAddrStack
|
||||||
*/
|
*/
|
||||||
void init(unsigned numEntries);
|
void init(unsigned numEntries);
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
|
||||||
/** Returns the top address on the RAS. */
|
/** Returns the top address on the RAS. */
|
||||||
Addr top()
|
Addr top()
|
||||||
{ return addrStack[tos]; }
|
{ return addrStack[tos]; }
|
||||||
|
|
|
@ -153,6 +153,10 @@ class DefaultRename
|
||||||
/** Sets pointer to the scoreboard. */
|
/** Sets pointer to the scoreboard. */
|
||||||
void setScoreboard(Scoreboard *_scoreboard);
|
void setScoreboard(Scoreboard *_scoreboard);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Squashes all instructions in a thread. */
|
/** Squashes all instructions in a thread. */
|
||||||
void squash(unsigned tid);
|
void squash(unsigned tid);
|
||||||
|
|
||||||
|
@ -448,6 +452,7 @@ class DefaultRename
|
||||||
Stats::Scalar<> renameUndoneMaps;
|
Stats::Scalar<> renameUndoneMaps;
|
||||||
Stats::Scalar<> renamedSerializing;
|
Stats::Scalar<> renamedSerializing;
|
||||||
Stats::Scalar<> renamedTempSerializing;
|
Stats::Scalar<> renamedTempSerializing;
|
||||||
|
Stats::Scalar<> renameSkidInsts;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_O3_RENAME_HH__
|
#endif // __CPU_O3_RENAME_HH__
|
||||||
|
|
|
@ -151,6 +151,11 @@ DefaultRename<Impl>::regStats()
|
||||||
.desc("count of temporary serializing insts renamed")
|
.desc("count of temporary serializing insts renamed")
|
||||||
.flags(Stats::total)
|
.flags(Stats::total)
|
||||||
;
|
;
|
||||||
|
renameSkidInsts
|
||||||
|
.name(name() + ".RENAME:skidInsts")
|
||||||
|
.desc("count of insts added to the skid buffer")
|
||||||
|
.flags(Stats::total)
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -213,8 +218,8 @@ DefaultRename<Impl>::initStage()
|
||||||
|
|
||||||
// Clear these pointers so they are not accidentally used in
|
// Clear these pointers so they are not accidentally used in
|
||||||
// non-initialization code.
|
// non-initialization code.
|
||||||
iew_ptr = NULL;
|
// iew_ptr = NULL;
|
||||||
commit_ptr = NULL;
|
// commit_ptr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
@ -253,6 +258,55 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
|
||||||
scoreboard = _scoreboard;
|
scoreboard = _scoreboard;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultRename<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
|
||||||
|
|
||||||
|
while (!historyBuffer[i].empty()) {
|
||||||
|
assert(hb_it != historyBuffer[i].end());
|
||||||
|
|
||||||
|
DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
|
||||||
|
"number %i.\n", i, (*hb_it).instSeqNum);
|
||||||
|
|
||||||
|
// Tell the rename map to set the architected register to the
|
||||||
|
// previous physical register that it was renamed to.
|
||||||
|
renameMap[i]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
|
||||||
|
|
||||||
|
// Put the renamed physical register back on the free list.
|
||||||
|
freeList->addReg(hb_it->newPhysReg);
|
||||||
|
|
||||||
|
historyBuffer[i].erase(hb_it++);
|
||||||
|
}
|
||||||
|
insts[i].clear();
|
||||||
|
skidBuffer[i].clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultRename<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
_status = Inactive;
|
||||||
|
initStage();
|
||||||
|
|
||||||
|
for (int i=0; i< numThreads; i++) {
|
||||||
|
renameStatus[i] = Idle;
|
||||||
|
|
||||||
|
stalls[i].iew = false;
|
||||||
|
stalls[i].commit = false;
|
||||||
|
serializeInst[i] = NULL;
|
||||||
|
|
||||||
|
instsInProgress[i] = 0;
|
||||||
|
|
||||||
|
emptyROB[i] = true;
|
||||||
|
|
||||||
|
serializeOnNextInst[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultRename<Impl>::squash(unsigned tid)
|
DefaultRename<Impl>::squash(unsigned tid)
|
||||||
|
@ -393,7 +447,7 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
|
||||||
} else if (renameStatus[tid] == Unblocking) {
|
} else if (renameStatus[tid] == Unblocking) {
|
||||||
renameInsts(tid);
|
renameInsts(tid);
|
||||||
|
|
||||||
++renameUnblockCycles;
|
// ++renameUnblockCycles;
|
||||||
|
|
||||||
if (validInsts()) {
|
if (validInsts()) {
|
||||||
// Add the current inputs to the skid buffer so they can be
|
// Add the current inputs to the skid buffer so they can be
|
||||||
|
@ -564,6 +618,8 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
|
||||||
} else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) {
|
} else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) {
|
||||||
DPRINTF(Rename, "Serialize after instruction encountered.\n");
|
DPRINTF(Rename, "Serialize after instruction encountered.\n");
|
||||||
|
|
||||||
|
renamedSerializing++;
|
||||||
|
|
||||||
inst->setSerializeHandled();
|
inst->setSerializeHandled();
|
||||||
|
|
||||||
serializeAfter(insts_to_rename, tid);
|
serializeAfter(insts_to_rename, tid);
|
||||||
|
@ -594,13 +650,12 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
|
||||||
// Increment which instruction we're on.
|
// Increment which instruction we're on.
|
||||||
++toIEWIndex;
|
++toIEWIndex;
|
||||||
|
|
||||||
++renameRenamedInsts;
|
|
||||||
|
|
||||||
// Decrement how many instructions are available.
|
// Decrement how many instructions are available.
|
||||||
--insts_available;
|
--insts_available;
|
||||||
}
|
}
|
||||||
|
|
||||||
instsInProgress[tid] += renamed_insts;
|
instsInProgress[tid] += renamed_insts;
|
||||||
|
renameRenamedInsts += renamed_insts;
|
||||||
|
|
||||||
// If we wrote to the time buffer, record this.
|
// If we wrote to the time buffer, record this.
|
||||||
if (toIEWIndex) {
|
if (toIEWIndex) {
|
||||||
|
@ -635,6 +690,8 @@ DefaultRename<Impl>::skidInsert(unsigned tid)
|
||||||
DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename "
|
DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename "
|
||||||
"skidBuffer\n", tid, inst->seqNum, inst->readPC());
|
"skidBuffer\n", tid, inst->seqNum, inst->readPC());
|
||||||
|
|
||||||
|
++renameSkidInsts;
|
||||||
|
|
||||||
skidBuffer[tid].push_back(inst);
|
skidBuffer[tid].push_back(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,10 @@ class ROB
|
||||||
*/
|
*/
|
||||||
void setActiveThreads(std::list<unsigned>* at_ptr);
|
void setActiveThreads(std::list<unsigned>* at_ptr);
|
||||||
|
|
||||||
|
void switchOut();
|
||||||
|
|
||||||
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Function to insert an instruction into the ROB. Note that whatever
|
/** Function to insert an instruction into the ROB. Note that whatever
|
||||||
* calls this function must ensure that there is enough space within the
|
* calls this function must ensure that there is enough space within the
|
||||||
* ROB for the new instruction.
|
* ROB for the new instruction.
|
||||||
|
|
|
@ -121,6 +121,31 @@ ROB<Impl>::setActiveThreads(list<unsigned> *at_ptr)
|
||||||
activeThreads = at_ptr;
|
activeThreads = at_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
ROB<Impl>::switchOut()
|
||||||
|
{
|
||||||
|
for (int tid = 0; tid < numThreads; tid++) {
|
||||||
|
instList[tid].clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
ROB<Impl>::takeOverFrom()
|
||||||
|
{
|
||||||
|
for (int tid=0; tid < numThreads; tid++) {
|
||||||
|
doneSquashing[tid] = true;
|
||||||
|
threadEntries[tid] = 0;
|
||||||
|
squashIt[tid] = instList[tid].end();
|
||||||
|
}
|
||||||
|
numInstsInROB = 0;
|
||||||
|
|
||||||
|
// Initialize the "universal" ROB head & tail point to invalid
|
||||||
|
// pointers
|
||||||
|
head = instList[0].end();
|
||||||
|
tail = instList[0].end();
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
|
|
|
@ -30,17 +30,17 @@
|
||||||
#include "cpu/o3/sat_counter.hh"
|
#include "cpu/o3/sat_counter.hh"
|
||||||
|
|
||||||
SatCounter::SatCounter()
|
SatCounter::SatCounter()
|
||||||
: maxVal(0), counter(0)
|
: initialVal(0), counter(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
SatCounter::SatCounter(unsigned bits)
|
SatCounter::SatCounter(unsigned bits)
|
||||||
: maxVal((1 << bits) - 1), counter(0)
|
: initialVal(0), maxVal((1 << bits) - 1), counter(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
SatCounter::SatCounter(unsigned bits, unsigned initial_val)
|
SatCounter::SatCounter(unsigned bits, uint8_t initial_val)
|
||||||
: maxVal((1 << bits) - 1), counter(initial_val)
|
: initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
|
||||||
{
|
{
|
||||||
// Check to make sure initial value doesn't exceed the max counter value.
|
// Check to make sure initial value doesn't exceed the max counter value.
|
||||||
if (initial_val > maxVal) {
|
if (initial_val > maxVal) {
|
||||||
|
@ -53,19 +53,3 @@ SatCounter::setBits(unsigned bits)
|
||||||
{
|
{
|
||||||
maxVal = (1 << bits) - 1;
|
maxVal = (1 << bits) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
SatCounter::increment()
|
|
||||||
{
|
|
||||||
if (counter < maxVal) {
|
|
||||||
++counter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
SatCounter::decrement()
|
|
||||||
{
|
|
||||||
if (counter > 0) {
|
|
||||||
--counter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -57,22 +57,34 @@ class SatCounter
|
||||||
* @param bits How many bits the counter will have.
|
* @param bits How many bits the counter will have.
|
||||||
* @param initial_val Starting value for each counter.
|
* @param initial_val Starting value for each counter.
|
||||||
*/
|
*/
|
||||||
SatCounter(unsigned bits, unsigned initial_val);
|
SatCounter(unsigned bits, uint8_t initial_val);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the number of bits.
|
* Sets the number of bits.
|
||||||
*/
|
*/
|
||||||
void setBits(unsigned bits);
|
void setBits(unsigned bits);
|
||||||
|
|
||||||
|
void reset() { counter = initialVal; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increments the counter's current value.
|
* Increments the counter's current value.
|
||||||
*/
|
*/
|
||||||
void increment();
|
void increment()
|
||||||
|
{
|
||||||
|
if (counter < maxVal) {
|
||||||
|
++counter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decrements the counter's current value.
|
* Decrements the counter's current value.
|
||||||
*/
|
*/
|
||||||
void decrement();
|
void decrement()
|
||||||
|
{
|
||||||
|
if (counter > 0) {
|
||||||
|
--counter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the counter's value.
|
* Read the counter's value.
|
||||||
|
@ -81,6 +93,7 @@ class SatCounter
|
||||||
{ return counter; }
|
{ return counter; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
uint8_t initialVal;
|
||||||
uint8_t maxVal;
|
uint8_t maxVal;
|
||||||
uint8_t counter;
|
uint8_t counter;
|
||||||
};
|
};
|
||||||
|
|
|
@ -60,7 +60,7 @@ struct O3ThreadState : public ThreadState {
|
||||||
{ }
|
{ }
|
||||||
#else
|
#else
|
||||||
O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
|
O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
|
||||||
: ThreadState(-1, _thread_num, NULL, _process, _asid),
|
: ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
|
||||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue