Switch out fixups for the CPUs.

cpu/cpu_exec_context.cc:
    Be sure to switch over the kernel stats so things don't get messed up.  This may lead to weird stats files for sampling runs (detailed stats should be correct, regardless of which kernel stats this is defined on).
cpu/o3/cpu.cc:
    Updates for switching out.  Also include a bunch of debug info if needed.
cpu/o3/fetch_impl.hh:
    Switch out properly.
cpu/o3/inst_queue.hh:
cpu/o3/inst_queue_impl.hh:
    Comment out unused stats (they made the stats file huge).
cpu/o3/lsq_unit.hh:
cpu/o3/lsq_unit_impl.hh:
    Add in new stat.
cpu/o3/rename.hh:
    Fix up for switching out.
cpu/o3/rename_impl.hh:
    Fix up for switching out.  Be sure to mark any Misc regs as ready if their renamed inst got squashed from being switched out.
cpu/ozone/cpu_impl.hh:
cpu/simple/cpu.cc:
    Switch out fixup.
sim/eventq.hh:
    Make CPU switching more immediate.
    Also comment out the assertion, as it doesn't apply if we're putting it on an inst-based queue.

--HG--
extra : convert_revision : f40ed40604738993f061e0c628810ff37a920562
This commit is contained in:
Kevin Lim 2006-08-24 17:29:34 -04:00
parent 5da3f70560
commit 74e8abd37e
12 changed files with 142 additions and 31 deletions

View file

@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext)
if (quiesceEvent) {
quiesceEvent->xc = proxy;
}
Kernel::Statistics *stats = oldContext->getKernelStats();
if (stats) {
kernelStats = stats;
}
#endif
storeCondFailures = 0;

View file

@ -599,8 +599,11 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
// Be sure to signal that there's some activity so the CPU doesn't
// deschedule itself.
activityRec.activity();
#if FULL_SYSTEM
if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled())
thread[tid]->quiesceEvent->deschedule();
#endif
fetch.wakeFromQuiesce();
@ -671,6 +674,8 @@ template <class Impl>
void
FullO3CPU<Impl>::switchOut(Sampler *_sampler)
{
DPRINTF(FullCPU, "Switching out\n");
BaseCPU::switchOut(_sampler);
sampler = _sampler;
switchCount = 0;
fetch.switchOut();
@ -694,6 +699,41 @@ FullO3CPU<Impl>::signalSwitched()
rename.doSwitchOut();
commit.doSwitchOut();
instList.clear();
#ifndef NDEBUG
PhysRegIndex renamed_reg;
// First loop through the integer registers.
for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
renamed_reg = renameMap[0].lookup(i);
assert(renamed_reg == commitRenameMap[0].lookup(i));
DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
renamed_reg);
assert(scoreboard.getReg(renamed_reg));
}
// Then loop through the floating point registers.
for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag);
assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag));
DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
renamed_reg);
assert(scoreboard.getReg(renamed_reg));
}
for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) {
renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs;
DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n",
renamed_reg);
assert(scoreboard.getReg(renamed_reg));
}
#endif
while (!removeList.empty()) {
removeList.pop();
}

View file

@ -391,6 +391,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
interruptPending = false;
branchPred.takeOverFrom();
}
@ -469,7 +470,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
if (interruptPending && flags == 0) {
if (isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions while an interrupt
// is pending.
return false;

View file

@ -474,11 +474,11 @@ class InstructionQueue
Stats::Scalar<> iqSquashedNonSpecRemoved;
/** Distribution of number of instructions in the queue. */
Stats::VectorDistribution<> queueResDist;
// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction. */
Stats::VectorDistribution<> issueDelayDist;
// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.

View file

@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")

View file

@ -382,6 +382,9 @@ class LSQUnit {
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
/** Tota number of memory ordering violations. */
Stats::Scalar<> lsqMemOrderViolation;
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;

View file

@ -144,6 +144,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
lsqMemOrderViolation
.name(name() + ".memOrderViolation")
.desc("Number of memory ordering violations");
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@ -495,6 +499,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
++lsqMemOrderViolation;
return genMachineCheckFault();
}

View file

@ -411,6 +411,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
PhysRegIndex maxPhysicalRegs;
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/

View file

@ -40,7 +40,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads)
numThreads(params->numberOfThreads),
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@ -283,6 +284,11 @@ DefaultRename<Impl>::doSwitchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
// Be sure to mark its register as ready if it's a misc register.
if (hb_it->newPhysReg >= maxPhysicalRegs) {
scoreboard->setReg(hb_it->newPhysReg);
}
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();

View file

@ -184,7 +184,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
globalSeqNum = 1;
#if FULL_SYSTEM
checkInterrupts = false;
#endif
lockFlag = 0;
@ -213,6 +215,7 @@ template <class Impl>
void
OzoneCPU<Impl>::switchOut(Sampler *_sampler)
{
BaseCPU::switchOut(_sampler);
sampler = _sampler;
switchCount = 0;
// Front end needs state from back end, so switch out the back end first.
@ -234,6 +237,16 @@ OzoneCPU<Impl>::signalSwitched()
checker->switchOut(sampler);
_status = SwitchedOut;
#ifndef NDEBUG
// Loop through all registers
for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
assert(thread.renameTable[i] == frontEnd->renameTable[i]);
assert(thread.renameTable[i] == backEnd->renameTable[i]);
DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
}
#endif
if (tickEvent.scheduled())
tickEvent.squash();
@ -256,9 +269,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
frontEnd->takeOverFrom();
assert(!tickEvent.scheduled());
#ifndef NDEBUG
// Check rename table.
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
assert(thread.renameTable[i]->isResultReady());
}
#endif
// @todo: Fix hardcoded number
// Clear out any old information in time buffer.
for (int i = 0; i < 6; ++i) {
for (int i = 0; i < 15; ++i) {
comm.advance();
}
@ -291,8 +311,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
scheduleTickEvent(delay);
_status = Running;
thread._status = ExecContext::Active;
#if FULL_SYSTEM
if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
thread.quiesceEvent->deschedule();
#endif
frontEnd->wakeFromQuiesce();
}
@ -369,7 +391,7 @@ template <class Impl>
void
OzoneCPU<Impl>::resetStats()
{
startNumInst = numInst;
// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@ -777,7 +799,9 @@ OzoneCPU<Impl>::OzoneXC::halt()
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
{ }
{
thread->dumpFuncProfile();
}
#endif
template <class Impl>
@ -797,6 +821,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
thread->inst = old_context->getInst();
#if !FULL_SYSTEM
setFuncExeInst(old_context->readFuncExeInst());
#else
@ -869,16 +894,14 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::profileClear()
{
if (thread->profile)
thread->profile->clear();
thread->profileClear();
}
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::profileSample()
{
if (thread->profile)
thread->profile->sample(thread->profileNode, thread->profilePC);
thread->profileSample();
}
#endif
@ -906,14 +929,20 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
cpu->frontEnd->setPC(thread->PC);
cpu->frontEnd->setNextPC(thread->nextPC);
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
if (i < TheISA::FP_Base_DepTag) {
thread->renameTable[i]->setIntResult(xc->readIntReg(i));
} else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
int fp_idx = i - TheISA::FP_Base_DepTag;
thread->renameTable[i]->setDoubleResult(
xc->readFloatRegDouble(fp_idx));
}
// First loop through the integer registers.
for (int i = 0; i < TheISA::NumIntRegs; ++i) {
/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
"now has data %lli.\n",
i, thread->renameTable[i]->readIntResult(),
xc->readIntReg(i));
*/
thread->renameTable[i]->setIntResult(xc->readIntReg(i));
}
// Then loop through the floating point registers.
for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
int fp_idx = i + TheISA::FP_Base_DepTag;
thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i));
}
#if !FULL_SYSTEM

View file

@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s)
_status = SwitchedOut;
if (tickEvent.scheduled())
tickEvent.squash();
tickEvent.deschedule();
assert(!tickEvent.scheduled());
sampler->signalSwitched();
}
@ -294,7 +296,7 @@ SimpleCPU::regStats()
void
SimpleCPU::resetStats()
{
startNumInst = numInst;
// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src)
Fault fault = cpuXC->translateDataReadReq(memReq);
if (fault == NoFault) {
panic("We can't copy!");
cpuXC->copySrcAddr = src;
cpuXC->copySrcPhysAddr = memReq->paddr + offset;
} else {
@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr)
void
SimpleCPU::processCacheCompletion()
{
Fault fault;
switch (status()) {
case IcacheMissStall:
icacheStallCycles += curTick - lastIcacheStall;
@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion()
break;
case DcacheMissSwitch:
if (memReq->cmd.isRead()) {
curStaticInst->execute(this,traceData);
fault = curStaticInst->execute(this,traceData);
if (traceData)
traceData->finalize();
} else {
fault = NoFault;
}
assert(fault == NoFault);
assert(!tickEvent.scheduled());
_status = SwitchedOut;
sampler->signalSwitched();
return;
case SwitchedOut:
// If this CPU has been switched out due to sampling/warm-up,
// ignore any further status changes (e.g., due to cache
@ -787,9 +797,10 @@ SimpleCPU::tick()
}
if (cpuXC->profile) {
bool usermode =
(cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
// bool usermode =
// (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
// cpuXC->profilePC = usermode ? 1 : cpuXC->readPC();
cpuXC->profilePC = cpuXC->readPC();
ProfileNode *node = cpuXC->profile->consume(xcProxy, inst);
if (node)
cpuXC->profileNode = node;
@ -849,8 +860,10 @@ SimpleCPU::tick()
status() == Idle ||
status() == DcacheMissStall);
if (status() == Running && !tickEvent.scheduled())
if (status() == Running && !tickEvent.scheduled()) {
assert(_status != SwitchedOut);
tickEvent.schedule(curTick + cycles(1));
}
}
////////////////////////////////////////////////////////////////////////
@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
#if FULL_SYSTEM
@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
INIT_PARAM(stats_reset_inst,
"instruction to reset stats on"),
INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0),
#if FULL_SYSTEM
@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
params->stats_reset_inst = stats_reset_inst;
params->deferRegistration = defer_registration;
params->clock = clock;
params->functionTrace = function_trace;

View file

@ -43,6 +43,7 @@
#include "sim/host.hh" // for Tick
#include "base/fast_alloc.hh"
#include "base/misc.hh"
#include "base/trace.hh"
#include "sim/serialize.hh"
@ -131,7 +132,7 @@ class Event : public Serializable, public FastAlloc
/// same cycle (after unscheduling the old CPU's tick event).
/// The switch needs to come before any tick events to make
/// sure we don't tick both CPUs in the same cycle.
CPU_Switch_Pri = 31,
CPU_Switch_Pri = -31,
/// Serailization needs to occur before tick events also, so
/// that a serialize/unserialize is identical to an on-line
@ -344,7 +345,8 @@ inline void
Event::schedule(Tick t)
{
assert(!scheduled());
assert(t >= curTick);
// if (t < curTick)
// warn("t is less than curTick, ensure you don't want cycles");
setFlags(Scheduled);
#if TRACING_ON