Switch out fixups for the CPUs.
cpu/cpu_exec_context.cc: Be sure to switch over the kernel stats so things don't get messed up. This may lead to weird stats files for sampling runs (detailed stats should be correct, regardless of which kernel stats this is defined on). cpu/o3/cpu.cc: Updates for switching out. Also include a bunch of debug info if needed. cpu/o3/fetch_impl.hh: Switch out properly. cpu/o3/inst_queue.hh: cpu/o3/inst_queue_impl.hh: Comment out unused stats (they made the stats file huge). cpu/o3/lsq_unit.hh: cpu/o3/lsq_unit_impl.hh: Add in new stat. cpu/o3/rename.hh: Fix up for switching out. cpu/o3/rename_impl.hh: Fix up for switching out. Be sure to mark any Misc regs as ready if their renamed inst got squashed from being switched out. cpu/ozone/cpu_impl.hh: cpu/simple/cpu.cc: Switch out fixup. sim/eventq.hh: Make CPU switching more immediate. Also comment out the assertion, as it doesn't apply if we're putting it on an inst-based queue. --HG-- extra : convert_revision : f40ed40604738993f061e0c628810ff37a920562
This commit is contained in:
parent
5da3f70560
commit
74e8abd37e
|
@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext)
|
|||
if (quiesceEvent) {
|
||||
quiesceEvent->xc = proxy;
|
||||
}
|
||||
|
||||
Kernel::Statistics *stats = oldContext->getKernelStats();
|
||||
if (stats) {
|
||||
kernelStats = stats;
|
||||
}
|
||||
#endif
|
||||
|
||||
storeCondFailures = 0;
|
||||
|
|
|
@ -599,8 +599,11 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
|
|||
// Be sure to signal that there's some activity so the CPU doesn't
|
||||
// deschedule itself.
|
||||
activityRec.activity();
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled())
|
||||
thread[tid]->quiesceEvent->deschedule();
|
||||
#endif
|
||||
|
||||
fetch.wakeFromQuiesce();
|
||||
|
||||
|
@ -671,6 +674,8 @@ template <class Impl>
|
|||
void
|
||||
FullO3CPU<Impl>::switchOut(Sampler *_sampler)
|
||||
{
|
||||
DPRINTF(FullCPU, "Switching out\n");
|
||||
BaseCPU::switchOut(_sampler);
|
||||
sampler = _sampler;
|
||||
switchCount = 0;
|
||||
fetch.switchOut();
|
||||
|
@ -694,6 +699,41 @@ FullO3CPU<Impl>::signalSwitched()
|
|||
rename.doSwitchOut();
|
||||
commit.doSwitchOut();
|
||||
instList.clear();
|
||||
|
||||
#ifndef NDEBUG
|
||||
PhysRegIndex renamed_reg;
|
||||
// First loop through the integer registers.
|
||||
for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
|
||||
renamed_reg = renameMap[0].lookup(i);
|
||||
assert(renamed_reg == commitRenameMap[0].lookup(i));
|
||||
|
||||
DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
|
||||
renamed_reg);
|
||||
|
||||
assert(scoreboard.getReg(renamed_reg));
|
||||
}
|
||||
|
||||
// Then loop through the floating point registers.
|
||||
for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
|
||||
renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag);
|
||||
assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag));
|
||||
|
||||
DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
|
||||
renamed_reg);
|
||||
|
||||
assert(scoreboard.getReg(renamed_reg));
|
||||
}
|
||||
|
||||
for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) {
|
||||
renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs;
|
||||
|
||||
DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
|
||||
renamed_reg);
|
||||
|
||||
assert(scoreboard.getReg(renamed_reg));
|
||||
}
|
||||
#endif
|
||||
|
||||
while (!removeList.empty()) {
|
||||
removeList.pop();
|
||||
}
|
||||
|
|
|
@ -391,6 +391,7 @@ DefaultFetch<Impl>::takeOverFrom()
|
|||
wroteToTimeBuffer = false;
|
||||
_status = Inactive;
|
||||
switchedOut = false;
|
||||
interruptPending = false;
|
||||
branchPred.takeOverFrom();
|
||||
}
|
||||
|
||||
|
@ -469,7 +470,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
|||
unsigned flags = 0;
|
||||
#endif // FULL_SYSTEM
|
||||
|
||||
if (interruptPending && flags == 0) {
|
||||
if (isSwitchedOut() || (interruptPending && flags == 0)) {
|
||||
// Hold off fetch from getting new instructions while an interrupt
|
||||
// is pending.
|
||||
return false;
|
||||
|
|
|
@ -474,11 +474,11 @@ class InstructionQueue
|
|||
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
||||
|
||||
/** Distribution of number of instructions in the queue. */
|
||||
Stats::VectorDistribution<> queueResDist;
|
||||
// Stats::VectorDistribution<> queueResDist;
|
||||
/** Distribution of the number of instructions issued. */
|
||||
Stats::Distribution<> numIssuedDist;
|
||||
/** Distribution of the cycles it takes to issue an instruction. */
|
||||
Stats::VectorDistribution<> issueDelayDist;
|
||||
// Stats::VectorDistribution<> issueDelayDist;
|
||||
|
||||
/** Number of times an instruction could not be issued because a
|
||||
* FU was busy.
|
||||
|
|
|
@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
|
|||
.name(name() + ".iqSquashedNonSpecRemoved")
|
||||
.desc("Number of squashed non-spec instructions that were removed")
|
||||
.prereq(iqSquashedNonSpecRemoved);
|
||||
|
||||
/*
|
||||
queueResDist
|
||||
.init(Num_OpClasses, 0, 99, 2)
|
||||
.name(name() + ".IQ:residence:")
|
||||
|
@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
|
|||
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||
queueResDist.subname(i, opClassStrings[i]);
|
||||
}
|
||||
*/
|
||||
numIssuedDist
|
||||
.init(0,totalWidth,1)
|
||||
.name(name() + ".ISSUE:issued_per_cycle")
|
||||
|
@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
|
|||
//
|
||||
// How long did instructions for a particular FU type wait prior to issue
|
||||
//
|
||||
|
||||
/*
|
||||
issueDelayDist
|
||||
.init(Num_OpClasses,0,99,2)
|
||||
.name(name() + ".ISSUE:")
|
||||
|
@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
|
|||
subname << opClassStrings[i] << "_delay";
|
||||
issueDelayDist.subname(i, subname.str());
|
||||
}
|
||||
|
||||
*/
|
||||
issueRate
|
||||
.name(name() + ".ISSUE:rate")
|
||||
.desc("Inst issue rate")
|
||||
|
|
|
@ -382,6 +382,9 @@ class LSQUnit {
|
|||
* ignored due to the instruction already being squashed. */
|
||||
Stats::Scalar<> lsqIgnoredResponses;
|
||||
|
||||
/** Tota number of memory ordering violations. */
|
||||
Stats::Scalar<> lsqMemOrderViolation;
|
||||
|
||||
/** Total number of squashed stores. */
|
||||
Stats::Scalar<> lsqSquashedStores;
|
||||
|
||||
|
|
|
@ -144,6 +144,10 @@ LSQUnit<Impl>::regStats()
|
|||
.name(name() + ".ignoredResponses")
|
||||
.desc("Number of memory responses ignored because the instruction is squashed");
|
||||
|
||||
lsqMemOrderViolation
|
||||
.name(name() + ".memOrderViolation")
|
||||
.desc("Number of memory ordering violations");
|
||||
|
||||
lsqSquashedStores
|
||||
.name(name() + ".squashedStores")
|
||||
.desc("Number of stores squashed");
|
||||
|
@ -495,6 +499,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
|
|||
// A load incorrectly passed this store. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
memDepViolator = loadQueue[load_idx];
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return genMachineCheckFault();
|
||||
}
|
||||
|
|
|
@ -411,6 +411,8 @@ class DefaultRename
|
|||
/** The maximum skid buffer size. */
|
||||
unsigned skidBufferMax;
|
||||
|
||||
PhysRegIndex maxPhysicalRegs;
|
||||
|
||||
/** Enum to record the source of a structure full stall. Can come from
|
||||
* either ROB, IQ, LSQ, and it is priortized in that order.
|
||||
*/
|
||||
|
|
|
@ -40,7 +40,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
|
|||
commitToRenameDelay(params->commitToRenameDelay),
|
||||
renameWidth(params->renameWidth),
|
||||
commitWidth(params->commitWidth),
|
||||
numThreads(params->numberOfThreads)
|
||||
numThreads(params->numberOfThreads),
|
||||
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
|
@ -283,6 +284,11 @@ DefaultRename<Impl>::doSwitchOut()
|
|||
// Put the renamed physical register back on the free list.
|
||||
freeList->addReg(hb_it->newPhysReg);
|
||||
|
||||
// Be sure to mark its register as ready if it's a misc register.
|
||||
if (hb_it->newPhysReg >= maxPhysicalRegs) {
|
||||
scoreboard->setReg(hb_it->newPhysReg);
|
||||
}
|
||||
|
||||
historyBuffer[i].erase(hb_it++);
|
||||
}
|
||||
insts[i].clear();
|
||||
|
|
|
@ -184,7 +184,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
|
|||
|
||||
globalSeqNum = 1;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
checkInterrupts = false;
|
||||
#endif
|
||||
|
||||
lockFlag = 0;
|
||||
|
||||
|
@ -213,6 +215,7 @@ template <class Impl>
|
|||
void
|
||||
OzoneCPU<Impl>::switchOut(Sampler *_sampler)
|
||||
{
|
||||
BaseCPU::switchOut(_sampler);
|
||||
sampler = _sampler;
|
||||
switchCount = 0;
|
||||
// Front end needs state from back end, so switch out the back end first.
|
||||
|
@ -234,6 +237,16 @@ OzoneCPU<Impl>::signalSwitched()
|
|||
checker->switchOut(sampler);
|
||||
|
||||
_status = SwitchedOut;
|
||||
#ifndef NDEBUG
|
||||
// Loop through all registers
|
||||
for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
|
||||
assert(thread.renameTable[i] == frontEnd->renameTable[i]);
|
||||
|
||||
assert(thread.renameTable[i] == backEnd->renameTable[i]);
|
||||
|
||||
DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (tickEvent.scheduled())
|
||||
tickEvent.squash();
|
||||
|
@ -256,9 +269,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
|||
frontEnd->takeOverFrom();
|
||||
assert(!tickEvent.scheduled());
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Check rename table.
|
||||
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
|
||||
assert(thread.renameTable[i]->isResultReady());
|
||||
}
|
||||
#endif
|
||||
|
||||
// @todo: Fix hardcoded number
|
||||
// Clear out any old information in time buffer.
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
for (int i = 0; i < 15; ++i) {
|
||||
comm.advance();
|
||||
}
|
||||
|
||||
|
@ -291,8 +311,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
|
|||
scheduleTickEvent(delay);
|
||||
_status = Running;
|
||||
thread._status = ExecContext::Active;
|
||||
#if FULL_SYSTEM
|
||||
if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
|
||||
thread.quiesceEvent->deschedule();
|
||||
#endif
|
||||
frontEnd->wakeFromQuiesce();
|
||||
}
|
||||
|
||||
|
@ -369,7 +391,7 @@ template <class Impl>
|
|||
void
|
||||
OzoneCPU<Impl>::resetStats()
|
||||
{
|
||||
startNumInst = numInst;
|
||||
// startNumInst = numInst;
|
||||
notIdleFraction = (_status != Idle);
|
||||
}
|
||||
|
||||
|
@ -777,7 +799,9 @@ OzoneCPU<Impl>::OzoneXC::halt()
|
|||
template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
|
||||
{ }
|
||||
{
|
||||
thread->dumpFuncProfile();
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class Impl>
|
||||
|
@ -797,6 +821,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
|
|||
copyArchRegs(old_context);
|
||||
setCpuId(old_context->readCpuId());
|
||||
|
||||
thread->inst = old_context->getInst();
|
||||
#if !FULL_SYSTEM
|
||||
setFuncExeInst(old_context->readFuncExeInst());
|
||||
#else
|
||||
|
@ -869,16 +894,14 @@ template <class Impl>
|
|||
void
|
||||
OzoneCPU<Impl>::OzoneXC::profileClear()
|
||||
{
|
||||
if (thread->profile)
|
||||
thread->profile->clear();
|
||||
thread->profileClear();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::OzoneXC::profileSample()
|
||||
{
|
||||
if (thread->profile)
|
||||
thread->profile->sample(thread->profileNode, thread->profilePC);
|
||||
thread->profileSample();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -906,14 +929,20 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
|
|||
cpu->frontEnd->setPC(thread->PC);
|
||||
cpu->frontEnd->setNextPC(thread->nextPC);
|
||||
|
||||
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
|
||||
if (i < TheISA::FP_Base_DepTag) {
|
||||
thread->renameTable[i]->setIntResult(xc->readIntReg(i));
|
||||
} else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
|
||||
int fp_idx = i - TheISA::FP_Base_DepTag;
|
||||
thread->renameTable[i]->setDoubleResult(
|
||||
xc->readFloatRegDouble(fp_idx));
|
||||
}
|
||||
// First loop through the integer registers.
|
||||
for (int i = 0; i < TheISA::NumIntRegs; ++i) {
|
||||
/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
|
||||
"now has data %lli.\n",
|
||||
i, thread->renameTable[i]->readIntResult(),
|
||||
xc->readIntReg(i));
|
||||
*/
|
||||
thread->renameTable[i]->setIntResult(xc->readIntReg(i));
|
||||
}
|
||||
|
||||
// Then loop through the floating point registers.
|
||||
for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
|
||||
int fp_idx = i + TheISA::FP_Base_DepTag;
|
||||
thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i));
|
||||
}
|
||||
|
||||
#if !FULL_SYSTEM
|
||||
|
|
|
@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s)
|
|||
_status = SwitchedOut;
|
||||
|
||||
if (tickEvent.scheduled())
|
||||
tickEvent.squash();
|
||||
tickEvent.deschedule();
|
||||
|
||||
assert(!tickEvent.scheduled());
|
||||
|
||||
sampler->signalSwitched();
|
||||
}
|
||||
|
@ -294,7 +296,7 @@ SimpleCPU::regStats()
|
|||
void
|
||||
SimpleCPU::resetStats()
|
||||
{
|
||||
startNumInst = numInst;
|
||||
// startNumInst = numInst;
|
||||
notIdleFraction = (_status != Idle);
|
||||
}
|
||||
|
||||
|
@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src)
|
|||
Fault fault = cpuXC->translateDataReadReq(memReq);
|
||||
|
||||
if (fault == NoFault) {
|
||||
panic("We can't copy!");
|
||||
cpuXC->copySrcAddr = src;
|
||||
cpuXC->copySrcPhysAddr = memReq->paddr + offset;
|
||||
} else {
|
||||
|
@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr)
|
|||
void
|
||||
SimpleCPU::processCacheCompletion()
|
||||
{
|
||||
Fault fault;
|
||||
|
||||
switch (status()) {
|
||||
case IcacheMissStall:
|
||||
icacheStallCycles += curTick - lastIcacheStall;
|
||||
|
@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion()
|
|||
break;
|
||||
case DcacheMissSwitch:
|
||||
if (memReq->cmd.isRead()) {
|
||||
curStaticInst->execute(this,traceData);
|
||||
fault = curStaticInst->execute(this,traceData);
|
||||
if (traceData)
|
||||
traceData->finalize();
|
||||
} else {
|
||||
fault = NoFault;
|
||||
}
|
||||
assert(fault == NoFault);
|
||||
assert(!tickEvent.scheduled());
|
||||
_status = SwitchedOut;
|
||||
sampler->signalSwitched();
|
||||
return;
|
||||
case SwitchedOut:
|
||||
// If this CPU has been switched out due to sampling/warm-up,
|
||||
// ignore any further status changes (e.g., due to cache
|
||||
|
@ -787,9 +797,10 @@ SimpleCPU::tick()
|
|||
}
|
||||
|
||||
if (cpuXC->profile) {
|
||||
bool usermode =
|
||||
(cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
|
||||
cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
|
||||
// bool usermode =
|
||||
// (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
|
||||
// cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
|
||||
cpuXC->profilePC = cpuXC->readPC();
|
||||
ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
|
||||
if (node)
|
||||
cpuXC->profileNode = node;
|
||||
|
@ -849,8 +860,10 @@ SimpleCPU::tick()
|
|||
status() == Idle ||
|
||||
status() == DcacheMissStall);
|
||||
|
||||
if (status() == Running && !tickEvent.scheduled())
|
||||
if (status() == Running && !tickEvent.scheduled()) {
|
||||
assert(_status != SwitchedOut);
|
||||
tickEvent.schedule(curTick + cycles(1));
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
|
|||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
|
@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
|
|||
"terminate when any thread reaches this load count"),
|
||||
INIT_PARAM(max_loads_all_threads,
|
||||
"terminate when all threads have reached this load count"),
|
||||
INIT_PARAM(stats_reset_inst,
|
||||
"instruction to reset stats on"),
|
||||
INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0),
|
||||
|
||||
#if FULL_SYSTEM
|
||||
|
@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU)
|
|||
params->max_insts_all_threads = max_insts_all_threads;
|
||||
params->max_loads_any_thread = max_loads_any_thread;
|
||||
params->max_loads_all_threads = max_loads_all_threads;
|
||||
params->stats_reset_inst = stats_reset_inst;
|
||||
params->deferRegistration = defer_registration;
|
||||
params->clock = clock;
|
||||
params->functionTrace = function_trace;
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "sim/host.hh" // for Tick
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "sim/serialize.hh"
|
||||
|
||||
|
@ -131,7 +132,7 @@ class Event : public Serializable, public FastAlloc
|
|||
/// same cycle (after unscheduling the old CPU's tick event).
|
||||
/// The switch needs to come before any tick events to make
|
||||
/// sure we don't tick both CPUs in the same cycle.
|
||||
CPU_Switch_Pri = 31,
|
||||
CPU_Switch_Pri = -31,
|
||||
|
||||
/// Serailization needs to occur before tick events also, so
|
||||
/// that a serialize/unserialize is identical to an on-line
|
||||
|
@ -344,7 +345,8 @@ inline void
|
|||
Event::schedule(Tick t)
|
||||
{
|
||||
assert(!scheduled());
|
||||
assert(t >= curTick);
|
||||
// if (t < curTick)
|
||||
// warn("t is less than curTick, ensure you don't want cycles");
|
||||
|
||||
setFlags(Scheduled);
|
||||
#if TRACING_ON
|
||||
|
|
Loading…
Reference in a new issue