diff --git a/cpu/cpu_exec_context.cc b/cpu/cpu_exec_context.cc index 0dcf149fd..9f151dd6a 100644 --- a/cpu/cpu_exec_context.cc +++ b/cpu/cpu_exec_context.cc @@ -183,6 +183,11 @@ CPUExecContext::takeOverFrom(ExecContext *oldContext) if (quiesceEvent) { quiesceEvent->xc = proxy; } + + Kernel::Statistics *stats = oldContext->getKernelStats(); + if (stats) { + kernelStats = stats; + } #endif storeCondFailures = 0; diff --git a/cpu/o3/cpu.cc b/cpu/o3/cpu.cc index 0025d4144..88de6c746 100644 --- a/cpu/o3/cpu.cc +++ b/cpu/o3/cpu.cc @@ -599,8 +599,11 @@ FullO3CPU::activateContext(int tid, int delay) // Be sure to signal that there's some activity so the CPU doesn't // deschedule itself. activityRec.activity(); + +#if FULL_SYSTEM if (thread[tid]->quiesceEvent && thread[tid]->quiesceEvent->scheduled()) thread[tid]->quiesceEvent->deschedule(); +#endif fetch.wakeFromQuiesce(); @@ -671,6 +674,8 @@ template void FullO3CPU::switchOut(Sampler *_sampler) { + DPRINTF(FullCPU, "Switching out\n"); + BaseCPU::switchOut(_sampler); sampler = _sampler; switchCount = 0; fetch.switchOut(); @@ -694,6 +699,41 @@ FullO3CPU::signalSwitched() rename.doSwitchOut(); commit.doSwitchOut(); instList.clear(); + +#ifndef NDEBUG + PhysRegIndex renamed_reg; + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { + renamed_reg = renameMap[0].lookup(i); + assert(renamed_reg == commitRenameMap[0].lookup(i)); + + DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n", + renamed_reg); + + assert(scoreboard.getReg(renamed_reg)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { + renamed_reg = renameMap[0].lookup(i + AlphaISA::FP_Base_DepTag); + assert(renamed_reg == commitRenameMap[0].lookup(i + AlphaISA::FP_Base_DepTag)); + + DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n", + renamed_reg); + + assert(scoreboard.getReg(renamed_reg)); + } + + for (int i = 0; i < AlphaISA::NumMiscRegs; ++i) { + renamed_reg = i + ((Params *)params)->numPhysFloatRegs + ((Params *)params)->numPhysIntRegs; + + DPRINTF(FullCPU, "FullCPU: Checking if register %i is ready.\n", + renamed_reg); + + assert(scoreboard.getReg(renamed_reg)); + } +#endif + while (!removeList.empty()) { removeList.pop(); } diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh index cc09c4a41..7a3292dbe 100644 --- a/cpu/o3/fetch_impl.hh +++ b/cpu/o3/fetch_impl.hh @@ -391,6 +391,7 @@ DefaultFetch::takeOverFrom() wroteToTimeBuffer = false; _status = Inactive; switchedOut = false; + interruptPending = false; branchPred.takeOverFrom(); } @@ -469,7 +470,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (interruptPending && flags == 0) { + if (isSwitchedOut() || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions while an interrupt // is pending. return false; diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh index 80cd71f0d..e96fbc667 100644 --- a/cpu/o3/inst_queue.hh +++ b/cpu/o3/inst_queue.hh @@ -474,11 +474,11 @@ class InstructionQueue Stats::Scalar<> iqSquashedNonSpecRemoved; /** Distribution of number of instructions in the queue. */ - Stats::VectorDistribution<> queueResDist; +// Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; /** Distribution of the cycles it takes to issue an instruction. */ - Stats::VectorDistribution<> issueDelayDist; +// Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a * FU was busy. diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh index 72cb0d708..b6b06ca77 100644 --- a/cpu/o3/inst_queue_impl.hh +++ b/cpu/o3/inst_queue_impl.hh @@ -230,7 +230,7 @@ InstructionQueue::regStats() .name(name() + ".iqSquashedNonSpecRemoved") .desc("Number of squashed non-spec instructions that were removed") .prereq(iqSquashedNonSpecRemoved); - +/* queueResDist .init(Num_OpClasses, 0, 99, 2) .name(name() + ".IQ:residence:") @@ -240,6 +240,7 @@ InstructionQueue::regStats() for (int i = 0; i < Num_OpClasses; ++i) { queueResDist.subname(i, opClassStrings[i]); } +*/ numIssuedDist .init(0,totalWidth,1) .name(name() + ".ISSUE:issued_per_cycle") @@ -268,7 +269,7 @@ InstructionQueue::regStats() // // How long did instructions for a particular FU type wait prior to issue // - +/* issueDelayDist .init(Num_OpClasses,0,99,2) .name(name() + ".ISSUE:") @@ -281,7 +282,7 @@ InstructionQueue::regStats() subname << opClassStrings[i] << "_delay"; issueDelayDist.subname(i, subname.str()); } - +*/ issueRate .name(name() + ".ISSUE:rate") .desc("Inst issue rate") diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh index fe174a97d..1db6dc02d 100644 --- a/cpu/o3/lsq_unit.hh +++ b/cpu/o3/lsq_unit.hh @@ -382,6 +382,9 @@ class LSQUnit { * ignored due to the instruction already being squashed. */ Stats::Scalar<> lsqIgnoredResponses; + /** Tota number of memory ordering violations. */ + Stats::Scalar<> lsqMemOrderViolation; + /** Total number of squashed stores. */ Stats::Scalar<> lsqSquashedStores; diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh index 5cc3078f8..7086c381e 100644 --- a/cpu/o3/lsq_unit_impl.hh +++ b/cpu/o3/lsq_unit_impl.hh @@ -144,6 +144,10 @@ LSQUnit::regStats() .name(name() + ".ignoredResponses") .desc("Number of memory responses ignored because the instruction is squashed"); + lsqMemOrderViolation + .name(name() + ".memOrderViolation") + .desc("Number of memory ordering violations"); + lsqSquashedStores .name(name() + ".squashedStores") .desc("Number of stores squashed"); @@ -495,6 +499,7 @@ LSQUnit::executeStore(DynInstPtr &store_inst) // A load incorrectly passed this store. Squash and refetch. // For now return a fault to show that it was unsuccessful. memDepViolator = loadQueue[load_idx]; + ++lsqMemOrderViolation; return genMachineCheckFault(); } diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh index 4912431ad..5769dbd37 100644 --- a/cpu/o3/rename.hh +++ b/cpu/o3/rename.hh @@ -411,6 +411,8 @@ class DefaultRename /** The maximum skid buffer size. */ unsigned skidBufferMax; + PhysRegIndex maxPhysicalRegs; + /** Enum to record the source of a structure full stall. Can come from * either ROB, IQ, LSQ, and it is priortized in that order. */ diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh index 93f5b3504..49627e3d4 100644 --- a/cpu/o3/rename_impl.hh +++ b/cpu/o3/rename_impl.hh @@ -40,7 +40,8 @@ DefaultRename::DefaultRename(Params *params) commitToRenameDelay(params->commitToRenameDelay), renameWidth(params->renameWidth), commitWidth(params->commitWidth), - numThreads(params->numberOfThreads) + numThreads(params->numberOfThreads), + maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs) { _status = Inactive; @@ -283,6 +284,11 @@ DefaultRename::doSwitchOut() // Put the renamed physical register back on the free list. freeList->addReg(hb_it->newPhysReg); + // Be sure to mark its register as ready if it's a misc register. + if (hb_it->newPhysReg >= maxPhysicalRegs) { + scoreboard->setReg(hb_it->newPhysReg); + } + historyBuffer[i].erase(hb_it++); } insts[i].clear(); diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 050bdb9a3..1a0de29f5 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -184,7 +184,9 @@ OzoneCPU::OzoneCPU(Params *p) globalSeqNum = 1; +#if FULL_SYSTEM checkInterrupts = false; +#endif lockFlag = 0; @@ -213,6 +215,7 @@ template void OzoneCPU::switchOut(Sampler *_sampler) { + BaseCPU::switchOut(_sampler); sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. @@ -234,6 +237,16 @@ OzoneCPU::signalSwitched() checker->switchOut(sampler); _status = SwitchedOut; +#ifndef NDEBUG + // Loop through all registers + for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) { + assert(thread.renameTable[i] == frontEnd->renameTable[i]); + + assert(thread.renameTable[i] == backEnd->renameTable[i]); + + DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i); + } +#endif if (tickEvent.scheduled()) tickEvent.squash(); @@ -256,9 +269,16 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) frontEnd->takeOverFrom(); assert(!tickEvent.scheduled()); +#ifndef NDEBUG + // Check rename table. + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + assert(thread.renameTable[i]->isResultReady()); + } +#endif + // @todo: Fix hardcoded number // Clear out any old information in time buffer. - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < 15; ++i) { comm.advance(); } @@ -291,8 +311,10 @@ OzoneCPU::activateContext(int thread_num, int delay) scheduleTickEvent(delay); _status = Running; thread._status = ExecContext::Active; +#if FULL_SYSTEM if (thread.quiesceEvent && thread.quiesceEvent->scheduled()) thread.quiesceEvent->deschedule(); +#endif frontEnd->wakeFromQuiesce(); } @@ -369,7 +391,7 @@ template void OzoneCPU::resetStats() { - startNumInst = numInst; +// startNumInst = numInst; notIdleFraction = (_status != Idle); } @@ -777,7 +799,9 @@ OzoneCPU::OzoneXC::halt() template void OzoneCPU::OzoneXC::dumpFuncProfile() -{ } +{ + thread->dumpFuncProfile(); +} #endif template @@ -797,6 +821,7 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) copyArchRegs(old_context); setCpuId(old_context->readCpuId()); + thread->inst = old_context->getInst(); #if !FULL_SYSTEM setFuncExeInst(old_context->readFuncExeInst()); #else @@ -869,16 +894,14 @@ template void OzoneCPU::OzoneXC::profileClear() { - if (thread->profile) - thread->profile->clear(); + thread->profileClear(); } template void OzoneCPU::OzoneXC::profileSample() { - if (thread->profile) - thread->profile->sample(thread->profileNode, thread->profilePC); + thread->profileSample(); } #endif @@ -906,14 +929,20 @@ OzoneCPU::OzoneXC::copyArchRegs(ExecContext *xc) cpu->frontEnd->setPC(thread->PC); cpu->frontEnd->setNextPC(thread->nextPC); - for (int i = 0; i < TheISA::TotalNumRegs; ++i) { - if (i < TheISA::FP_Base_DepTag) { - thread->renameTable[i]->setIntResult(xc->readIntReg(i)); - } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { - int fp_idx = i - TheISA::FP_Base_DepTag; - thread->renameTable[i]->setDoubleResult( - xc->readFloatRegDouble(fp_idx)); - } + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { +/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + i, thread->renameTable[i]->readIntResult(), + xc->readIntReg(i)); +*/ + thread->renameTable[i]->setIntResult(xc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + int fp_idx = i + TheISA::FP_Base_DepTag; + thread->renameTable[fp_idx]->setIntResult(xc->readFloatRegInt(i)); } #if !FULL_SYSTEM diff --git a/cpu/simple/cpu.cc b/cpu/simple/cpu.cc index 0a4b3c3e4..eb19115b2 100644 --- a/cpu/simple/cpu.cc +++ b/cpu/simple/cpu.cc @@ -181,7 +181,9 @@ SimpleCPU::switchOut(Sampler *s) _status = SwitchedOut; if (tickEvent.scheduled()) - tickEvent.squash(); + tickEvent.deschedule(); + + assert(!tickEvent.scheduled()); sampler->signalSwitched(); } @@ -294,7 +296,7 @@ SimpleCPU::regStats() void SimpleCPU::resetStats() { - startNumInst = numInst; +// startNumInst = numInst; notIdleFraction = (_status != Idle); } @@ -352,6 +354,7 @@ SimpleCPU::copySrcTranslate(Addr src) Fault fault = cpuXC->translateDataReadReq(memReq); if (fault == NoFault) { + panic("We can't copy!"); cpuXC->copySrcAddr = src; cpuXC->copySrcPhysAddr = memReq->paddr + offset; } else { @@ -600,6 +603,8 @@ SimpleCPU::dbg_vtophys(Addr addr) void SimpleCPU::processCacheCompletion() { + Fault fault; + switch (status()) { case IcacheMissStall: icacheStallCycles += curTick - lastIcacheStall; @@ -618,12 +623,17 @@ SimpleCPU::processCacheCompletion() break; case DcacheMissSwitch: if (memReq->cmd.isRead()) { - curStaticInst->execute(this,traceData); + fault = curStaticInst->execute(this,traceData); if (traceData) traceData->finalize(); + } else { + fault = NoFault; } + assert(fault == NoFault); + assert(!tickEvent.scheduled()); _status = SwitchedOut; sampler->signalSwitched(); + return; case SwitchedOut: // If this CPU has been switched out due to sampling/warm-up, // ignore any further status changes (e.g., due to cache @@ -787,9 +797,10 @@ SimpleCPU::tick() } if (cpuXC->profile) { - bool usermode = - (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0; - cpuXC->profilePC = usermode ? 1 : cpuXC->readPC(); +// bool usermode = +// (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0; +// cpuXC->profilePC = usermode ? 1 : cpuXC->readPC(); + cpuXC->profilePC = cpuXC->readPC(); ProfileNode *node = cpuXC->profile->consume(xcProxy, inst); if (node) cpuXC->profileNode = node; @@ -849,8 +860,10 @@ SimpleCPU::tick() status() == Idle || status() == DcacheMissStall); - if (status() == Running && !tickEvent.scheduled()) + if (status() == Running && !tickEvent.scheduled()) { + assert(_status != SwitchedOut); tickEvent.schedule(curTick + cycles(1)); + } } //////////////////////////////////////////////////////////////////////// @@ -863,6 +876,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU) Param max_insts_all_threads; Param max_loads_any_thread; Param max_loads_all_threads; + Param stats_reset_inst; Param progress_interval; #if FULL_SYSTEM @@ -897,6 +911,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU) "terminate when any thread reaches this load count"), INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), + INIT_PARAM(stats_reset_inst, + "instruction to reset stats on"), INIT_PARAM_DFLT(progress_interval, "CPU Progress interval", 0), #if FULL_SYSTEM @@ -930,6 +946,7 @@ CREATE_SIM_OBJECT(SimpleCPU) params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; params->max_loads_all_threads = max_loads_all_threads; + params->stats_reset_inst = stats_reset_inst; params->deferRegistration = defer_registration; params->clock = clock; params->functionTrace = function_trace; diff --git a/sim/eventq.hh b/sim/eventq.hh index 5fc73bb53..b9a0abc12 100644 --- a/sim/eventq.hh +++ b/sim/eventq.hh @@ -43,6 +43,7 @@ #include "sim/host.hh" // for Tick #include "base/fast_alloc.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "sim/serialize.hh" @@ -131,7 +132,7 @@ class Event : public Serializable, public FastAlloc /// same cycle (after unscheduling the old CPU's tick event). /// The switch needs to come before any tick events to make /// sure we don't tick both CPUs in the same cycle. - CPU_Switch_Pri = 31, + CPU_Switch_Pri = -31, /// Serailization needs to occur before tick events also, so /// that a serialize/unserialize is identical to an on-line @@ -344,7 +345,8 @@ inline void Event::schedule(Tick t) { assert(!scheduled()); - assert(t >= curTick); +// if (t < curTick) +// warn("t is less than curTick, ensure you don't want cycles"); setFlags(Scheduled); #if TRACING_ON