diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh index c1afb4720..3f83f9bef 100644 --- a/src/cpu/o3/bpred_unit.hh +++ b/src/cpu/o3/bpred_unit.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -88,8 +88,10 @@ class BPredUnit */ void regStats(); - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; + /** Take over execution from another CPU's thread. */ void takeOverFrom(); /** diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh index 74fe27a6a..43e801710 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -131,12 +131,12 @@ BPredUnit::regStats() template void -BPredUnit::switchOut() +BPredUnit::drainSanityCheck() const { - // Clear any state upon switch out. - for (int i = 0; i < Impl::MaxThreads; ++i) { - squash(0, i); - } + // We shouldn't have any outstanding requests when we resume from + // a drained system. + for (int i = 0; i < Impl::MaxThreads; ++i) + assert(predHist[i].empty()); } template diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 2d8d88b21..c76d6c1d0 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -199,13 +199,16 @@ class DefaultCommit void startupStage(); /** Initializes the draining of commit. */ - bool drain(); + void drain(); /** Resumes execution after draining. */ - void resume(); + void drainResume(); - /** Completes the switch out of commit. */ - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; + + /** Has the stage drained? */ + bool isDrained() const; /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -438,9 +441,6 @@ class DefaultCommit /** Is a drain pending. */ bool drainPending; - /** Is commit switched out. */ - bool switchedOut; - /** The latency to handle a trap. Used when scheduling trap * squash event. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 333ccc89f..ea709e92c 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -57,6 +57,7 @@ #include "debug/Activity.hh" #include "debug/Commit.hh" #include "debug/CommitRate.hh" +#include "debug/Drain.hh" #include "debug/ExecFaulting.hh" #include "params/DerivO3CPU.hh" #include "sim/faults.hh" @@ -99,7 +100,6 @@ DefaultCommit::DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params) commitWidth(params->commitWidth), numThreads(params->numThreads), drainPending(false), - switchedOut(false), trapLatency(params->trapLatency), canHandleInterrupts(true) { @@ -369,35 +369,59 @@ DefaultCommit::startupStage() } template -bool +void DefaultCommit::drain() { drainPending = true; - - return false; } template void -DefaultCommit::switchOut() +DefaultCommit::drainResume() { - switchedOut = true; drainPending = false; - rob->switchOut(); } template void -DefaultCommit::resume() +DefaultCommit::drainSanityCheck() const { - drainPending = false; + assert(isDrained()); + rob->drainSanityCheck(); +} + +template +bool +DefaultCommit::isDrained() const +{ + /* Make sure no one is executing microcode. There are two reasons + * for this: + * - Hardware virtualized CPUs can't switch into the middle of a + * microcode sequence. + * - The current fetch implementation will most likely get very + * confused if it tries to start fetching an instruction that + * is executing in the middle of a ucode sequence that changes + * address mappings. This can happen on for example x86. + */ + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (pc[tid].microPC() != 0) + return false; + } + + /* Make sure that all instructions have finished committing before + * declaring the system as drained. We want the pipeline to be + * completely empty when we declare the CPU to be drained. This + * makes debugging easier since CPU handover and restoring from a + * checkpoint with a different CPU should have the same timing. + */ + return rob->isEmpty() && + interrupt == NoFault; } template void DefaultCommit::takeOverFrom() { - switchedOut = false; _status = Active; _nextStatus = Inactive; for (ThreadID tid = 0; tid < numThreads; tid++) { @@ -624,13 +648,6 @@ DefaultCommit::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (drainPending && cpu->instList.empty() && !iewStage->hasStoresToWB() && - interrupt == NoFault) { - cpu->signalDrained(); - drainPending = false; - return; - } - if (activeThreads->empty()) return; @@ -1018,6 +1035,14 @@ DefaultCommit::commitInsts() if (head_inst->isSquashAfter()) squashAfter(tid, head_inst); + if (drainPending) { + DPRINTF(Drain, "Draining: %i:%s\n", tid, pc[tid]); + if (pc[tid].microPC() == 0 && interrupt == NoFault) { + squashAfter(tid, head_inst); + cpu->commitDrained(tid); + } + } + int count = 0; Addr oldpc; // Debug statement. Checks to make sure we're not diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 724d88405..cb17581e5 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -257,7 +257,7 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) globalSeqNum(1), system(params->system), - drainCount(0), + drainManager(NULL), lastRunningCycle(curCycle()) { if (!params->switched_out) { @@ -584,6 +584,8 @@ void FullO3CPU::tick() { DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); + assert(!switchedOut()); + assert(getDrainState() != Drainable::Drained); ++numCycles; @@ -618,8 +620,7 @@ FullO3CPU::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut || - getDrainState() == Drainable::Drained) { + if (_status == SwitchedOut) { DPRINTF(O3CPU, "Switched out!\n"); // increment stat lastRunningCycle = curCycle(); @@ -635,6 +636,8 @@ FullO3CPU::tick() if (!FullSystem) updateThreadPriority(); + + tryDrain(); } template @@ -657,13 +660,6 @@ FullO3CPU::init() thread[tid]->initMemProxies(thread[tid]->getTC()); } - // this CPU could still be unconnected if we are restoring from a - // checkpoint and this CPU is to be switched in, thus we can only - // do this here if the instruction port is actually connected, if - // not we have to do it as part of takeOverFrom - if (icachePort.isConnected()) - fetch.setIcache(); - if (FullSystem && !params()->switched_out) { for (ThreadID tid = 0; tid < numThreads; tid++) { ThreadContext *src_tc = threadContexts[tid]; @@ -683,6 +679,7 @@ void FullO3CPU::startup() { fetch.startupStage(); + decode.startupStage(); iew.startupStage(); rename.startupStage(); commit.startupStage(); @@ -696,6 +693,7 @@ FullO3CPU::activateThread(ThreadID tid) std::find(activeThreads.begin(), activeThreads.end(), tid); DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid); + assert(!switchedOut()); if (isActive == activeThreads.end()) { DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", @@ -714,6 +712,7 @@ FullO3CPU::deactivateThread(ThreadID tid) std::find(activeThreads.begin(), activeThreads.end(), tid); DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid); + assert(!switchedOut()); if (thread_it != activeThreads.end()) { DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", @@ -752,6 +751,8 @@ template void FullO3CPU::activateContext(ThreadID tid, Cycles delay) { + assert(!switchedOut()); + // Needs to set each stage to running as well. if (delay){ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " @@ -761,6 +762,12 @@ FullO3CPU::activateContext(ThreadID tid, Cycles delay) activateThread(tid); } + // We don't want to wake the CPU if it is drained. In that case, + // we just want to flag the thread as active and schedule the tick + // event from drainResume() instead. + if (getDrainState() == Drainable::Drained) + return; + // If we are time 0 or if the last activation time is in the past, // schedule the next tick and wake up the fetch unit if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) { @@ -807,6 +814,7 @@ void FullO3CPU::suspendContext(ThreadID tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + assert(!switchedOut()); bool deallocated = scheduleDeallocateContext(tid, false, Cycles(1)); // If this was the last thread then unschedule the tick event. if ((activeThreads.size() == 1 && !deallocated) || @@ -824,6 +832,7 @@ FullO3CPU::haltContext(ThreadID tid) { //For now, this is the same as deallocate DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + assert(!switchedOut()); scheduleDeallocateContext(tid, true, Cycles(1)); } @@ -1120,26 +1129,26 @@ template unsigned int FullO3CPU::drain(DrainManager *drain_manager) { - DPRINTF(O3CPU, "Switching out\n"); - // If the CPU isn't doing anything, then return immediately. - if (_status == SwitchedOut) + if (switchedOut()) { + setDrainState(Drainable::Drained); return 0; + } - drainCount = 0; - fetch.drain(); - decode.drain(); - rename.drain(); - iew.drain(); + DPRINTF(Drain, "Draining...\n"); + setDrainState(Drainable::Draining); + + // We only need to signal a drain to the commit stage as this + // initiates squashing controls the draining. Once the commit + // stage commits an instruction where it is safe to stop, it'll + // squash the rest of the instructions in the pipeline and force + // the fetch stage to stall. The pipeline will be drained once all + // in-flight instructions have retired. commit.drain(); // Wake the CPU and record activity so everything can drain out if // the CPU was not able to immediately drain. - if (getDrainState() != Drainable::Drained) { - // A bit of a hack...set the drainManager after all the drain() - // calls have been made, that way if all of the stages drain - // immediately, the signalDrained() function knows not to call - // process on the drain event. + if (!isDrained()) { drainManager = drain_manager; wakeCPU(); @@ -1149,93 +1158,167 @@ FullO3CPU::drain(DrainManager *drain_manager) return 1; } else { + setDrainState(Drainable::Drained); + DPRINTF(Drain, "CPU is already drained\n"); + if (tickEvent.scheduled()) + deschedule(tickEvent); + + // Flush out any old data from the time buffers. In + // particular, there might be some data in flight from the + // fetch stage that isn't visible in any of the CPU buffers we + // test in isDrained(). + for (int i = 0; i < timeBuffer.getSize(); ++i) { + timeBuffer.advance(); + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + } + + drainSanityCheck(); return 0; } } +template +bool +FullO3CPU::tryDrain() +{ + if (!drainManager || !isDrained()) + return false; + + if (tickEvent.scheduled()) + deschedule(tickEvent); + + DPRINTF(Drain, "CPU done draining, processing drain event\n"); + drainManager->signalDrainDone(); + drainManager = NULL; + + return true; +} + +template +void +FullO3CPU::drainSanityCheck() const +{ + assert(isDrained()); + fetch.drainSanityCheck(); + decode.drainSanityCheck(); + rename.drainSanityCheck(); + iew.drainSanityCheck(); + commit.drainSanityCheck(); +} + +template +bool +FullO3CPU::isDrained() const +{ + bool drained(true); + + for (ThreadID i = 0; i < thread.size(); ++i) { + if (activateThreadEvent[i].scheduled()) { + DPRINTF(Drain, "CPU not drained, tread %i has a " + "pending activate event\n", i); + drained = false; + } + if (deallocateContextEvent[i].scheduled()) { + DPRINTF(Drain, "CPU not drained, tread %i has a " + "pending deallocate context event\n", i); + drained = false; + } + } + + if (!instList.empty() || !removeList.empty()) { + DPRINTF(Drain, "Main CPU structures not drained.\n"); + drained = false; + } + + if (!fetch.isDrained()) { + DPRINTF(Drain, "Fetch not drained.\n"); + drained = false; + } + + if (!decode.isDrained()) { + DPRINTF(Drain, "Decode not drained.\n"); + drained = false; + } + + if (!rename.isDrained()) { + DPRINTF(Drain, "Rename not drained.\n"); + drained = false; + } + + if (!iew.isDrained()) { + DPRINTF(Drain, "IEW not drained.\n"); + drained = false; + } + + if (!commit.isDrained()) { + DPRINTF(Drain, "Commit not drained.\n"); + drained = false; + } + + return drained; +} + +template +void +FullO3CPU::commitDrained(ThreadID tid) +{ + fetch.drainStall(tid); +} + template void FullO3CPU::drainResume() { - fetch.resume(); - decode.resume(); - rename.resume(); - iew.resume(); - commit.resume(); - setDrainState(Drainable::Running); - - if (_status == SwitchedOut) + if (switchedOut()) return; + DPRINTF(Drain, "Resuming...\n"); + if (system->getMemoryMode() != Enums::timing) { fatal("The O3 CPU requires the memory system to be in " "'timing' mode.\n"); } - if (!tickEvent.scheduled()) - schedule(tickEvent, nextCycle()); - _status = Running; -} + fetch.drainResume(); + commit.drainResume(); -template -void -FullO3CPU::signalDrained() -{ - if (++drainCount == NumStages) { - if (tickEvent.scheduled()) - tickEvent.squash(); - - setDrainState(Drainable::Drained); - - if (drainManager) { - DPRINTF(Drain, "CPU done draining, processing drain event\n"); - drainManager->signalDrainDone(); - drainManager = NULL; + _status = Idle; + for (ThreadID i = 0; i < thread.size(); i++) { + if (thread[i]->status() == ThreadContext::Active) { + DPRINTF(Drain, "Activating thread: %i\n", i); + activateThread(i); + _status = Running; } } - assert(drainCount <= 5); + + assert(!tickEvent.scheduled()); + if (_status == Running) + schedule(tickEvent, nextCycle()); } template void FullO3CPU::switchOut() { + DPRINTF(O3CPU, "Switching out\n"); BaseCPU::switchOut(); - fetch.switchOut(); - rename.switchOut(); - iew.switchOut(); - commit.switchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } + activityRec.reset(); _status = SwitchedOut; if (checker) checker->switchOut(); - - if (tickEvent.scheduled()) - tickEvent.squash(); } template void FullO3CPU::takeOverFrom(BaseCPU *oldCPU) { - // Flush out any old data from the time buffers. - for (int i = 0; i < timeBuffer.getSize(); ++i) { - timeBuffer.advance(); - fetchQueue.advance(); - decodeQueue.advance(); - renameQueue.advance(); - iewQueue.advance(); - } - - activityRec.reset(); - BaseCPU::takeOverFrom(oldCPU); fetch.takeOverFrom(); @@ -1244,42 +1327,14 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) iew.takeOverFrom(); commit.takeOverFrom(); - assert(!tickEvent.scheduled() || tickEvent.squashed()); + assert(!tickEvent.scheduled()); FullO3CPU *oldO3CPU = dynamic_cast*>(oldCPU); if (oldO3CPU) globalSeqNum = oldO3CPU->globalSeqNum; - // @todo: Figure out how to properly select the tid to put onto - // the active threads list. - ThreadID tid = 0; - - list::iterator isActive = - std::find(activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the delay - //needed for thread to activate - DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", - tid); - - activeThreads.push_back(tid); - } - - // Set all statuses to active, schedule the CPU's tick event. - // @todo: Fix up statuses so this is handled properly - ThreadID size = threadContexts.size(); - for (ThreadID i = 0; i < size; ++i) { - ThreadContext *tc = threadContexts[i]; - if (tc->status() == ThreadContext::Active && _status != Running) { - _status = Running; - reschedule(tickEvent, nextCycle(), true); - } - } - if (!tickEvent.scheduled()) - schedule(tickEvent, nextCycle()); - lastRunningCycle = curCycle(); + _status = Idle; } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 890598b0f..24c4b46a8 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -333,6 +333,33 @@ class FullO3CPU : public BaseO3CPU /** The tick event used for scheduling CPU ticks. */ DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + /** + * Check if the pipeline has drained and signal the DrainManager. + * + * This method checks if a drain has been requested and if the CPU + * has drained successfully (i.e., there are no instructions in + * the pipeline). If the CPU has drained, it deschedules the tick + * event and signals the drain manager. + * + * @return False if a drain hasn't been requested or the CPU + * hasn't drained, true otherwise. + */ + bool tryDrain(); + + /** + * Perform sanity checks after a drain. + * + * This method is called from drain() when it has determined that + * the CPU is fully drained when gem5 is compiled with the NDEBUG + * macro undefined. The intention of this method is to do more + * extensive tests than the isDrained() method to weed out any + * draining bugs. + */ + void drainSanityCheck() const; + + /** Check if a system is in a drained state. */ + bool isDrained() const; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(DerivO3CPUParams *params); @@ -416,6 +443,9 @@ class FullO3CPU : public BaseO3CPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Is the CPU draining? */ + bool isDraining() const { return getDrainState() == Drainable::Draining; } + /** Serialize state. */ virtual void serialize(std::ostream &os); @@ -435,8 +465,14 @@ class FullO3CPU : public BaseO3CPU /** Resumes execution after a drain. */ void drainResume(); - /** Signals to this CPU that a stage has completed switching out. */ - void signalDrained(); + /** + * Commit has reached a safe point to drain a thread. + * + * Commit calls this method to inform the pipeline that it has + * reached a point where it is not executed microcode and is about + * to squash uncommitted instructions to fully drain the pipeline. + */ + void commitDrained(ThreadID tid); /** Switches out this CPU. */ virtual void switchOut(); @@ -732,9 +768,6 @@ class FullO3CPU : public BaseO3CPU /** DrainManager to notify when draining has completed. */ DrainManager *drainManager; - /** Counter of how many stages have completed draining. */ - int drainCount; - /** Pointers to all of the threads in the CPU. */ std::vector thread; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 817c5263f..3424b1d07 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -89,6 +101,9 @@ class DefaultDecode /** DefaultDecode constructor. */ DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params); + void startupStage(); + void resetStage(); + /** Returns the name of decode. */ std::string name() const; @@ -107,17 +122,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list *at_ptr); - /** Drains the decode stage. */ - bool drain(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; - /** Resumes execution after a drain. */ - void resume() { } - - /** Switches out the decode stage. */ - void switchOut() { } + /** Has the stage drained? */ + bool isDrained() const { return true; } /** Takes over from another CPU's thread. */ - void takeOverFrom(); + void takeOverFrom() { resetStage(); } /** Ticks decode, processing all input signals and decoding as many * instructions as possible. @@ -268,9 +280,6 @@ class DefaultDecode /** List of active thread ids */ std::list *activeThreads; - /** Number of branches in flight. */ - unsigned branchCount[Impl::MaxThreads]; - /** Maximum size of the skid buffer. */ unsigned skidBufferMax; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 315d53155..cd226017f 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -1,4 +1,16 @@ -/* +/* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -51,6 +63,21 @@ DefaultDecode::DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params) fetchToDecodeDelay(params->fetchToDecodeDelay), decodeWidth(params->decodeWidth), numThreads(params->numThreads) +{ + // @todo: Make into a parameter + skidBufferMax = (fetchToDecodeDelay + 1) * params->fetchWidth; +} + +template +void +DefaultDecode::startupStage() +{ + resetStage(); +} + +template +void +DefaultDecode::resetStage() { _status = Inactive; @@ -62,9 +89,6 @@ DefaultDecode::DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params) stalls[tid].iew = false; stalls[tid].commit = false; } - - // @todo: Make into a parameter - skidBufferMax = (fetchToDecodeDelay + 1) * params->fetchWidth; } template @@ -163,35 +187,14 @@ DefaultDecode::setActiveThreads(std::list *at_ptr) activeThreads = at_ptr; } -template -bool -DefaultDecode::drain() -{ - // Decode is done draining at any time. - cpu->signalDrained(); - return true; -} - template void -DefaultDecode::takeOverFrom() +DefaultDecode::drainSanityCheck() const { - _status = Inactive; - - // Be sure to reset state and clear out any old instructions. for (ThreadID tid = 0; tid < numThreads; ++tid) { - decodeStatus[tid] = Idle; - - stalls[tid].rename = false; - stalls[tid].iew = false; - stalls[tid].commit = false; - while (!insts[tid].empty()) - insts[tid].pop(); - while (!skidBuffer[tid].empty()) - skidBuffer[tid].pop(); - branchCount[tid] = 0; + assert(insts[tid].empty()); + assert(skidBuffer[tid].empty()); } - wroteToTimeBuffer = false; } template diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh index 804b3f9cd..41ab6e94f 100644 --- a/src/cpu/o3/dep_graph.hh +++ b/src/cpu/o3/dep_graph.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * @@ -93,8 +105,11 @@ class DependencyGraph /** Removes and returns the newest dependent of a specific register. */ DynInstPtr pop(PhysRegIndex idx); + /** Checks if the entire dependency graph is empty. */ + bool empty() const; + /** Checks if there are any dependents on a specific register. */ - bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } + bool empty(PhysRegIndex idx) const { return !dependGraph[idx].next; } /** Debugging function to dump out the dependency graph. */ @@ -240,6 +255,17 @@ DependencyGraph::pop(PhysRegIndex idx) return inst; } +template +bool +DependencyGraph::empty() const +{ + for (int i = 0; i < numEntries; ++i) { + if (!empty(i)) + return false; + } + return true; +} + template void DependencyGraph::dump() diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 702a45e15..fb17a9247 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011 ARM Limited + * Copyright (c) 2010-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -165,7 +165,6 @@ class DefaultFetch Fetching, TrapPending, QuiescePending, - SwitchOut, ItlbWait, IcacheWaitResponse, IcacheWaitRetry, @@ -226,25 +225,36 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the drain of the fetch stage. */ - bool drain(); + /** Resume after a drain. */ + void drainResume(); - /** Resumes execution after a drain. */ - void resume(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; - /** Tells fetch stage to prepare to be switched out. */ - void switchOut(); + /** Has the stage drained? */ + bool isDrained() const; /** Takes over from another CPU's thread. */ void takeOverFrom(); - /** Checks if the fetch stage is switched out. */ - bool isSwitchedOut() { return switchedOut; } + /** + * Stall the fetch stage after reaching a safe drain point. + * + * The CPU uses this method to stop fetching instructions from a + * thread that has been drained. The drain stall is different from + * all other stalls in that it is signaled instantly from the + * commit stage (without the normal communication delay) when it + * has reached a safe point to drain from. + */ + void drainStall(ThreadID tid); /** Tells fetch to wake up from a quiesce instruction. */ void wakeFromQuiesce(); private: + /** Reset this pipeline stage */ + void resetStage(); + /** Changes the status of this stage to active, and indicates this * to the CPU. */ @@ -423,6 +433,7 @@ class DefaultFetch bool rename; bool iew; bool commit; + bool drain; }; /** Tracks which stages are telling fetch to stall. */ @@ -490,12 +501,6 @@ class DefaultFetch */ bool interruptPending; - /** Is there a drain pending. */ - bool drainPending; - - /** Records if fetch is switched out. */ - bool switchedOut; - /** Set to true if a pipelined I-cache request should be issued. */ bool issuePipelinedIfetch[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 87d2bc593..f531203d9 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011 ARM Limited + * Copyright (c) 2010-2012 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -58,6 +58,7 @@ #include "cpu/o3/fetch.hh" #include "cpu/exetrace.hh" #include "debug/Activity.hh" +#include "debug/Drain.hh" #include "debug/Fetch.hh" #include "mem/packet.hh" #include "params/DerivO3CPU.hh" @@ -73,20 +74,15 @@ template DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) : cpu(_cpu), branchPred(params), - numInst(0), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), commitToFetchDelay(params->commitToFetchDelay), fetchWidth(params->fetchWidth), - cacheBlocked(false), retryPkt(NULL), retryTid(InvalidThreadID), numThreads(params->numThreads), numFetchingThreads(params->smtNumFetchingThreads), - interruptPending(false), - drainPending(false), - switchedOut(false), finishTranslationEvent(this) { if (numThreads > Impl::MaxThreads) @@ -98,9 +94,6 @@ DefaultFetch::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params) "\tincrease MaxWidth in src/cpu/o3/impl.hh\n", fetchWidth, static_cast(Impl::MaxWidth)); - // Set fetch stage's status to inactive. - _status = Inactive; - std::string policy = params->smtFetchPolicy; // Convert string to lowercase @@ -304,34 +297,52 @@ template void DefaultFetch::startupStage() { + assert(priorityList.empty()); + resetStage(); + + // Fetch needs to start fetching instructions at the very beginning, + // so it must start up in active state. + switchToActive(); +} + +template +void +DefaultFetch::resetStage() +{ + numInst = 0; + interruptPending = false; + cacheBlocked = false; + + priorityList.clear(); + // Setup PC and nextPC with initial state. for (ThreadID tid = 0; tid < numThreads; tid++) { + fetchStatus[tid] = Running; pc[tid] = cpu->pcState(tid); fetchOffset[tid] = 0; macroop[tid] = NULL; + delayedCommit[tid] = false; - } - - for (ThreadID tid = 0; tid < numThreads; tid++) { - - fetchStatus[tid] = Running; - - priorityList.push_back(tid); - memReq[tid] = NULL; stalls[tid].decode = false; stalls[tid].rename = false; stalls[tid].iew = false; stalls[tid].commit = false; + stalls[tid].drain = false; + + priorityList.push_back(tid); } - // Schedule fetch to get the correct PC from the CPU - // scheduleFetchStartupEvent(1); + wroteToTimeBuffer = false; + _status = Inactive; - // Fetch needs to start fetching instructions at the very beginning, - // so it must start up in active state. - switchToActive(); + // this CPU could still be unconnected if we are restoring from a + // checkpoint and this CPU is to be switched in, thus we can only + // do this here if the instruction port is actually connected, if + // not we have to do it as part of takeOverFrom. + if (cpu->getInstPort().isConnected()) + setIcache(); } template @@ -362,12 +373,12 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) ThreadID tid = pkt->req->threadId(); DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); + assert(!cpu->switchedOut()); // Only change the status if it's still waiting on the icache access // to return. if (fetchStatus[tid] != IcacheWaitResponse || - pkt->req != memReq[tid] || - isSwitchedOut()) { + pkt->req != memReq[tid]) { ++fetchIcacheSquashes; delete pkt->req; delete pkt; @@ -377,16 +388,14 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize); cacheDataValid[tid] = true; - if (!drainPending) { - // Wake up the CPU (if it went to sleep and was waiting on - // this completion event). - cpu->wakeCPU(); + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); - switchToActive(); - } + switchToActive(); // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -401,57 +410,79 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) memReq[tid] = NULL; } +template +void +DefaultFetch::drainResume() +{ + for (ThreadID i = 0; i < Impl::MaxThreads; ++i) + stalls[i].drain = false; +} + +template +void +DefaultFetch::drainSanityCheck() const +{ + assert(isDrained()); + assert(retryPkt == NULL); + assert(retryTid == InvalidThreadID); + assert(cacheBlocked == false); + assert(interruptPending == false); + + for (ThreadID i = 0; i < numThreads; ++i) { + assert(!memReq[i]); + assert(!stalls[i].decode); + assert(!stalls[i].rename); + assert(!stalls[i].iew); + assert(!stalls[i].commit); + assert(fetchStatus[i] == Idle || stalls[i].drain); + } + + branchPred.drainSanityCheck(); +} + template bool -DefaultFetch::drain() +DefaultFetch::isDrained() const { - // Fetch is ready to drain at any time. - cpu->signalDrained(); - drainPending = true; - return true; -} + /* Make sure that threads are either idle of that the commit stage + * has signaled that draining has completed by setting the drain + * stall flag. This effectively forces the pipeline to be disabled + * until the whole system is drained (simulation may continue to + * drain other components). + */ + for (ThreadID i = 0; i < numThreads; ++i) { + if (!(fetchStatus[i] == Idle || + (fetchStatus[i] == Blocked && stalls[i].drain))) + return false; + } -template -void -DefaultFetch::resume() -{ - drainPending = false; -} - -template -void -DefaultFetch::switchOut() -{ - switchedOut = true; - // Branch predictor needs to have its state cleared. - branchPred.switchOut(); + /* The pipeline might start up again in the middle of the drain + * cycle if the finish translation event is scheduled, so make + * sure that's not the case. + */ + return !finishTranslationEvent.scheduled(); } template void DefaultFetch::takeOverFrom() { - // the instruction port is now connected so we can get the block - // size - setIcache(); + assert(cpu->getInstPort().isConnected()); + resetStage(); - // Reset all state - for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { - stalls[i].decode = 0; - stalls[i].rename = 0; - stalls[i].iew = 0; - stalls[i].commit = 0; - pc[i] = cpu->pcState(i); - fetchStatus[i] = Running; - } - numInst = 0; - wroteToTimeBuffer = false; - _status = Inactive; - switchedOut = false; - interruptPending = false; branchPred.takeOverFrom(); } +template +void +DefaultFetch::drainStall(ThreadID tid) +{ + assert(cpu->isDraining()); + assert(!stalls[tid].drain); + DPRINTF(Drain, "%i: Thread drained.\n", tid); + stalls[tid].drain = true; +} + template void DefaultFetch::wakeFromQuiesce() @@ -536,16 +567,14 @@ DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) { Fault fault = NoFault; + assert(!cpu->switchedOut()); + // @todo: not sure if these should block translation. //AlphaDep if (cacheBlocked) { DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", tid); return false; - } else if (isSwitchedOut()) { - DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", - tid); - return false; } else if (checkInterrupt(pc) && !delayedCommit[tid]) { // Hold off fetch from getting new instructions when: // Cache is blocked, or @@ -586,11 +615,13 @@ DefaultFetch::finishTranslation(Fault fault, RequestPtr mem_req) ThreadID tid = mem_req->threadId(); Addr block_PC = mem_req->getVaddr(); + assert(!cpu->switchedOut()); + // Wake up CPU if it was idle cpu->wakeCPU(); if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] || - mem_req->getVaddr() != memReq[tid]->getVaddr() || isSwitchedOut()) { + mem_req->getVaddr() != memReq[tid]->getVaddr()) { DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", tid); ++fetchTlbSquashes; @@ -757,6 +788,10 @@ DefaultFetch::checkStall(ThreadID tid) const if (cpu->contextSwitch) { DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); ret_val = true; + } else if (stalls[tid].drain) { + assert(cpu->isDraining()); + DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid); + ret_val = true; } else if (stalls[tid].decode) { DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); ret_val = true; @@ -1097,7 +1132,9 @@ DefaultFetch::fetch(bool &status_change) ////////////////////////////////////////// ThreadID tid = getFetchingThread(fetchPolicy); - if (tid == InvalidThreadID || drainPending) { + assert(!cpu->switchedOut()); + + if (tid == InvalidThreadID) { // Breaks looping condition in tick() threadFetched = numFetchingThreads; @@ -1147,8 +1184,7 @@ DefaultFetch::fetch(bool &status_change) else ++fetchMiscStallCycles; return; - } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid]) - || isSwitchedOut()) { + } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) { // Stall CPU if an interrupt is posted and we're not issuing // an delayed commit micro-op currently (delayed commit instructions // are not interruptable by interrupts, only faults) @@ -1566,7 +1602,7 @@ DefaultFetch::profileStall(ThreadID tid) { // @todo Per-thread stats - if (drainPending) { + if (stalls[tid].drain) { ++fetchPendingDrainCycles; DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); } else if (activeThreads->empty()) { diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index ecbd79ee7..c0db5cbfc 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * @@ -247,17 +259,11 @@ FUPool::dump() } void -FUPool::switchOut() +FUPool::drainSanityCheck() const { -} - -void -FUPool::takeOver() -{ - for (int i = 0; i < numFU; i++) { - unitBusy[i] = false; - } - unitsToBeFreed.clear(); + assert(unitsToBeFreed.empty()); + for (int i = 0; i < numFU; i++) + assert(!unitBusy[i]); } // diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh index fbdc1d89a..85912af3a 100644 --- a/src/cpu/o3/fu_pool.hh +++ b/src/cpu/o3/fu_pool.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * @@ -157,11 +169,11 @@ class FUPool : public SimObject return maxIssueLatencies[capability]; } - /** Switches out functional unit pool. */ - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; /** Takes over from another CPU's thread. */ - void takeOver(); + void takeOverFrom() {}; }; #endif // __CPU_O3_FU_POOL_HH__ diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index dcc8ecf82..5adf32752 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -150,21 +150,15 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Drains IEW stage. */ - bool drain(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; - /** Resumes execution after a drain. */ - void resume(); - - /** Completes switch out of IEW stage. */ - void switchOut(); + /** Has the stage drained? */ + bool isDrained() const; /** Takes over from another CPU's thread. */ void takeOverFrom(); - /** Returns if IEW is switched out. */ - bool isSwitchedOut() { return switchedOut; } - /** Squashes instructions in IEW for a specific thread. */ void squash(ThreadID tid); @@ -470,9 +464,6 @@ class DefaultIEW /** Maximum size of the skid buffer. */ unsigned skidBufferMax; - /** Is this stage switched out. */ - bool switchedOut; - /** Stat for total number of idle cycles. */ Stats::Scalar iewIdleCycles; /** Stat for total number of squashing cycles. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index e25c8829b..4b4f66a1f 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011 ARM Limited + * Copyright (c) 2010-2012 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -54,6 +54,7 @@ #include "cpu/timebuf.hh" #include "debug/Activity.hh" #include "debug/Decode.hh" +#include "debug/Drain.hh" #include "debug/IEW.hh" #include "params/DerivO3CPU.hh" @@ -73,8 +74,7 @@ DefaultIEW::DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params) issueWidth(params->issueWidth), wbOutstanding(0), wbWidth(params->wbWidth), - numThreads(params->numThreads), - switchedOut(false) + numThreads(params->numThreads) { _status = Active; exeStatus = Running; @@ -360,38 +360,33 @@ DefaultIEW::setScoreboard(Scoreboard *sb_ptr) template bool -DefaultIEW::drain() +DefaultIEW::isDrained() const { - // IEW is ready to drain at any time. - cpu->signalDrained(); - return true; -} - -template -void -DefaultIEW::resume() -{ -} - -template -void -DefaultIEW::switchOut() -{ - // Clear any state. - switchedOut = true; - assert(insts[0].empty()); - assert(skidBuffer[0].empty()); - - instQueue.switchOut(); - ldstQueue.switchOut(); - fuPool->switchOut(); + bool drained(ldstQueue.isDrained()); for (ThreadID tid = 0; tid < numThreads; tid++) { - while (!insts[tid].empty()) - insts[tid].pop(); - while (!skidBuffer[tid].empty()) - skidBuffer[tid].pop(); + if (!insts[tid].empty()) { + DPRINTF(Drain, "%i: Insts not empty.\n", tid); + drained = false; + } + if (!skidBuffer[tid].empty()) { + DPRINTF(Drain, "%i: Skid buffer not empty.\n", tid); + drained = false; + } } + + return drained; +} + +template +void +DefaultIEW::drainSanityCheck() const +{ + assert(isDrained()); + + instQueue.drainSanityCheck(); + ldstQueue.drainSanityCheck(); + fuPool->drainSanityCheck(); } template @@ -402,11 +397,10 @@ DefaultIEW::takeOverFrom() _status = Active; exeStatus = Running; wbStatus = Idle; - switchedOut = false; instQueue.takeOverFrom(); ldstQueue.takeOverFrom(); - fuPool->takeOver(); + fuPool->takeOverFrom(); startupStage(); cpu->activityThisCycle(); diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 42a244c0a..15190970d 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011-2012 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -144,15 +144,12 @@ class InstructionQueue /** Sets the global time buffer. */ void setTimeBuffer(TimeBuffer *tb_ptr); - /** Switches out the instruction queue. */ - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; /** Takes over execution from another CPU's thread. */ void takeOverFrom(); - /** Returns if the IQ is switched out. */ - bool isSwitchedOut() { return switchedOut; } - /** Number of entries needed for given amount of threads. */ int entryAmount(ThreadID num_threads); @@ -428,9 +425,6 @@ class InstructionQueue */ Cycles commitToIEWDelay; - /** Is the IQ switched out. */ - bool switchedOut; - /** The sequence number of the squashed instruction. */ InstSeqNum squashedSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 785f86676..3e3325beb 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011-2012 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -93,8 +93,6 @@ InstructionQueue::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, { assert(fuPool); - switchedOut = false; - numThreads = params->numThreads; // Set the number of physical registers as the number of int + float @@ -439,29 +437,19 @@ InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) template void -InstructionQueue::switchOut() +InstructionQueue::drainSanityCheck() const { -/* - if (!instList[0].empty() || (numEntries != freeEntries) || - !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) { - dumpInsts(); -// assert(0); - } -*/ - resetState(); - dependGraph.reset(); - instsToExecute.clear(); - switchedOut = true; - for (ThreadID tid = 0; tid < numThreads; ++tid) { - memDepUnit[tid].switchOut(); - } + assert(dependGraph.empty()); + assert(instsToExecute.empty()); + for (ThreadID tid = 0; tid < numThreads; ++tid) + memDepUnit[tid].drainSanityCheck(); } template void InstructionQueue::takeOverFrom() { - switchedOut = false; + resetState(); } template @@ -716,14 +704,9 @@ void InstructionQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) { DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum); + assert(!cpu->switchedOut()); // The CPU could have been sleeping until this op completed (*extremely* // long latency op). Wake it if it was. This may be overkill. - if (isSwitchedOut()) { - DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n", - inst->seqNum); - return; - } - iewStage->wakeCPU(); if (fu_idx > -1) diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 7caee86f6..6857a6aca 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 ARM Limited + * Copyright (c) 2011-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -79,8 +79,11 @@ class LSQ { /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list *at_ptr); - /** Switches out the LSQ. */ - void switchOut(); + + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; + /** Has the LSQ drained? */ + bool isDrained() const; /** Takes over execution from another CPU's thread. */ void takeOverFrom(); @@ -211,6 +214,13 @@ class LSQ { */ bool isFull(ThreadID tid); + /** Returns if the LSQ is empty (both LQ and SQ are empty). */ + bool isEmpty() const; + /** Returns if all of the LQs are empty. */ + bool lqEmpty() const; + /** Returns if all of the SQs are empty. */ + bool sqEmpty() const; + /** Returns if any of the LQs are full. */ bool lqFull(); /** Returns if the LQ of a given thread is full. */ @@ -254,7 +264,7 @@ class LSQ { { return thread[tid].willWB(); } /** Returns if the cache is currently blocked. */ - bool cacheBlocked() + bool cacheBlocked() const { return retryTid != InvalidThreadID; } /** Sets the retry thread id, indicating that one of the LSQUnits diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 7051c9f7c..c796d7078 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -45,6 +45,7 @@ #include #include "cpu/o3/lsq.hh" +#include "debug/Drain.hh" #include "debug/Fetch.hh" #include "debug/LSQ.hh" #include "debug/Writeback.hh" @@ -143,11 +144,36 @@ LSQ::setActiveThreads(list *at_ptr) template void -LSQ::switchOut() +LSQ::drainSanityCheck() const { - for (ThreadID tid = 0; tid < numThreads; tid++) { - thread[tid].switchOut(); + assert(isDrained()); + + for (ThreadID tid = 0; tid < numThreads; tid++) + thread[tid].drainSanityCheck(); +} + +template +bool +LSQ::isDrained() const +{ + bool drained(true); + + if (!lqEmpty()) { + DPRINTF(Drain, "Not drained, LQ not empty.\n"); + drained = false; } + + if (!sqEmpty()) { + DPRINTF(Drain, "Not drained, SQ not empty.\n"); + drained = false; + } + + if (retryTid != InvalidThreadID) { + DPRINTF(Drain, "Not drained, the LSQ has blocked the caches.\n"); + drained = false; + } + + return drained; } template @@ -456,6 +482,47 @@ LSQ::isFull(ThreadID tid) return thread[tid].lqFull() || thread[tid].sqFull(); } +template +bool +LSQ::isEmpty() const +{ + return lqEmpty() && sqEmpty(); +} + +template +bool +LSQ::lqEmpty() const +{ + list::const_iterator threads = activeThreads->begin(); + list::const_iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!thread[tid].lqEmpty()) + return false; + } + + return true; +} + +template +bool +LSQ::sqEmpty() const +{ + list::const_iterator threads = activeThreads->begin(); + list::const_iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!thread[tid].sqEmpty()) + return false; + } + + return true; +} + template bool LSQ::lqFull() diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 2c79931e2..5b8e02fc6 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -91,15 +103,12 @@ class LSQUnit { /** Sets the pointer to the dcache port. */ void setDcachePort(MasterPort *dcache_port); - /** Switches out LSQ unit. */ - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; /** Takes over from another CPU's thread. */ void takeOverFrom(); - /** Returns if the LSQ is switched out. */ - bool isSwitchedOut() { return switchedOut; } - /** Ticks the LSQ unit, which in this case only resets the number of * used cache ports. * @todo: Move the number of used ports up to the LSQ level so it can @@ -201,12 +210,21 @@ class LSQUnit { /** Returns if either the LQ or SQ is full. */ bool isFull() { return lqFull() || sqFull(); } + /** Returns if both the LQ and SQ are empty. */ + bool isEmpty() const { return lqEmpty() && sqEmpty(); } + /** Returns if the LQ is full. */ bool lqFull() { return loads >= (LQEntries - 1); } /** Returns if the SQ is full. */ bool sqFull() { return stores >= (SQEntries - 1); } + /** Returns if the LQ is empty. */ + bool lqEmpty() const { return loads == 0; } + + /** Returns if the SQ is empty. */ + bool sqEmpty() const { return stores == 0; } + /** Returns the number of instructions in the LSQ. */ unsigned getCount() { return loads + stores; } @@ -225,6 +243,9 @@ class LSQUnit { void recvRetry(); private: + /** Reset the LSQ state */ + void resetState(); + /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -420,9 +441,6 @@ class LSQUnit { /** The number of used cache ports in this cycle. */ int usedPorts; - /** Is the LSQ switched out. */ - bool switchedOut; - //list mshrSeqNums; /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */ diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index d640f94a3..a4cb56767 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -66,9 +66,9 @@ template void LSQUnit::WritebackEvent::process() { - if (!lsqPtr->isSwitchedOut()) { - lsqPtr->writeback(inst, pkt); - } + assert(!lsqPtr->cpu->switchedOut()); + + lsqPtr->writeback(inst, pkt); if (pkt->senderState) delete pkt->senderState; @@ -102,7 +102,8 @@ LSQUnit::completeDataAccess(PacketPtr pkt) return; } - if (isSwitchedOut() || inst->isSquashed()) { + assert(!cpu->switchedOut()); + if (inst->isSquashed()) { iewStage->decrWb(inst->seqNum); } else { if (!state->noWB) { @@ -147,10 +148,6 @@ LSQUnit::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); - switchedOut = false; - - cacheBlockMask = 0; - lsq = lsq_ptr; lsqID = id; @@ -164,19 +161,35 @@ LSQUnit::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, depCheckShift = params->LSQDepCheckShift; checkLoads = params->LSQCheckLoads; + cachePorts = params->cachePorts; + needsTSO = params->needsTSO; + + resetState(); +} + + +template +void +LSQUnit::resetState() +{ + loads = stores = storesToWB = 0; loadHead = loadTail = 0; storeHead = storeWBIdx = storeTail = 0; usedPorts = 0; - cachePorts = params->cachePorts; retryPkt = NULL; memDepViolator = NULL; blockedLoadSeqNum = 0; - needsTSO = params->needsTSO; + + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; + + cacheBlockMask = 0; } template @@ -258,40 +271,20 @@ LSQUnit::clearSQ() template void -LSQUnit::switchOut() +LSQUnit::drainSanityCheck() const { - switchedOut = true; - for (int i = 0; i < loadQueue.size(); ++i) { + for (int i = 0; i < loadQueue.size(); ++i) assert(!loadQueue[i]); - loadQueue[i] = NULL; - } assert(storesToWB == 0); + assert(!retryPkt); } template void LSQUnit::takeOverFrom() { - switchedOut = false; - loads = stores = storesToWB = 0; - - loadHead = loadTail = 0; - - storeHead = storeWBIdx = storeTail = 0; - - usedPorts = 0; - - memDepViolator = NULL; - - blockedLoadSeqNum = 0; - - stalled = false; - isLoadBlocked = false; - loadBlockedHandled = false; - - // Just incase the memory system changed out from under us - cacheBlockMask = 0; + resetState(); } template diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh index ce5a62ef8..989d36ea1 100644 --- a/src/cpu/o3/mem_dep_unit.hh +++ b/src/cpu/o3/mem_dep_unit.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -92,8 +104,8 @@ class MemDepUnit /** Registers statistics. */ void regStats(); - /** Switches out the memory dependence predictor. */ - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index d30dcbd3d..7fbea0216 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -114,17 +126,14 @@ MemDepUnit::regStats() template void -MemDepUnit::switchOut() +MemDepUnit::drainSanityCheck() const { - assert(instList[0].empty()); assert(instsToReplay.empty()); assert(memDepHash.empty()); - // Clear any state. - for (int i = 0; i < Impl::MaxThreads; ++i) { - instList[i].clear(); - } - instsToReplay.clear(); - memDepHash.clear(); + for (int i = 0; i < Impl::MaxThreads; ++i) + assert(instList[i].empty()); + assert(instsToReplay.empty()); + assert(memDepHash.empty()); } template diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 0aa238c06..606c3365e 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -157,14 +169,11 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); - /** Drains the rename stage. */ - bool drain(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; - /** Resumes execution after a drain. */ - void resume() { } - - /** Switches out the rename stage. */ - void switchOut(); + /** Has the stage drained? */ + bool isDrained() const; /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -181,6 +190,9 @@ class DefaultRename void dumpHistory(); private: + /** Reset this pipeline stage */ + void resetStage(); + /** Determines what to do based on rename's current status. * @param status_change rename() sets this variable if there was a status * change (ie switching from blocking to unblocking). diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 4996cfcad..15a4ebc13 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2012 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -61,31 +61,9 @@ DefaultRename::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params) commitToRenameDelay(params->commitToRenameDelay), renameWidth(params->renameWidth), commitWidth(params->commitWidth), - resumeSerialize(false), - resumeUnblocking(false), numThreads(params->numThreads), maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs) { - _status = Inactive; - - for (ThreadID tid = 0; tid < numThreads; tid++) { - renameStatus[tid] = Idle; - - freeEntries[tid].iqEntries = 0; - freeEntries[tid].lsqEntries = 0; - freeEntries[tid].robEntries = 0; - - stalls[tid].iew = false; - stalls[tid].commit = false; - serializeInst[tid] = NULL; - - instsInProgress[tid] = 0; - - emptyROB[tid] = true; - - serializeOnNextInst[tid] = false; - } - // @todo: Make into a parameter. skidBufferMax = (2 * (decodeToRenameDelay * params->decodeWidth)) + renameWidth; } @@ -230,12 +208,34 @@ template void DefaultRename::startupStage() { + resetStage(); +} + +template +void +DefaultRename::resetStage() +{ + _status = Inactive; + + resumeSerialize = false; + resumeUnblocking = false; + // Grab the number of free entries directly from the stages. for (ThreadID tid = 0; tid < numThreads; tid++) { + renameStatus[tid] = Idle; + freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid); freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid); freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid); emptyROB[tid] = true; + + stalls[tid].iew = false; + stalls[tid].commit = false; + serializeInst[tid] = NULL; + + instsInProgress[tid] = 0; + + serializeOnNextInst[tid] = false; } } @@ -271,67 +271,34 @@ DefaultRename::setScoreboard(Scoreboard *_scoreboard) template bool -DefaultRename::drain() +DefaultRename::isDrained() const { - // Rename is ready to switch out at any time. - cpu->signalDrained(); - return true; -} - -template -void -DefaultRename::switchOut() -{ - // Clear any state, fix up the rename map. for (ThreadID tid = 0; tid < numThreads; tid++) { - typename std::list::iterator hb_it = - historyBuffer[tid].begin(); - - while (!historyBuffer[tid].empty()) { - assert(hb_it != historyBuffer[tid].end()); - - DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence " - "number %i.\n", tid, (*hb_it).instSeqNum); - - // Tell the rename map to set the architected register to the - // previous physical register that it was renamed to. - renameMap[tid]->setEntry(hb_it->archReg, hb_it->prevPhysReg); - - // Put the renamed physical register back on the free list. - freeList->addReg(hb_it->newPhysReg); - - // Be sure to mark its register as ready if it's a misc register. - if (hb_it->newPhysReg >= maxPhysicalRegs) { - scoreboard->setReg(hb_it->newPhysReg); - } - - historyBuffer[tid].erase(hb_it++); - } - insts[tid].clear(); - skidBuffer[tid].clear(); + if (instsInProgress[tid] != 0 || + !historyBuffer[tid].empty() || + !skidBuffer[tid].empty() || + !insts[tid].empty()) + return false; } + return true; } template void DefaultRename::takeOverFrom() { - _status = Inactive; - startupStage(); + resetStage(); +} - // Reset all state prior to taking over from the other CPU. +template +void +DefaultRename::drainSanityCheck() const +{ for (ThreadID tid = 0; tid < numThreads; tid++) { - renameStatus[tid] = Idle; - - stalls[tid].iew = false; - stalls[tid].commit = false; - serializeInst[tid] = NULL; - - instsInProgress[tid] = 0; - - emptyROB[tid] = true; - - serializeOnNextInst[tid] = false; + assert(historyBuffer[tid].empty()); + assert(insts[tid].empty()); + assert(skidBuffer[tid].empty()); + assert(instsInProgress[tid] == 0); } } diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index d0b156954..171781ce2 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -97,8 +109,8 @@ class ROB */ void setActiveThreads(std::list *at_ptr); - /** Switches out the ROB. */ - void switchOut(); + /** Perform sanity checks after a drain. */ + void drainSanityCheck() const; /** Takes over another CPU's thread. */ void takeOverFrom(); @@ -185,11 +197,11 @@ class ROB { return threadEntries[tid] == numEntries; } /** Returns if the ROB is empty. */ - bool isEmpty() + bool isEmpty() const { return numInstsInROB == 0; } /** Returns if a specific thread's partition is empty. */ - bool isEmpty(ThreadID tid) + bool isEmpty(ThreadID tid) const { return threadEntries[tid] == 0; } /** Executes the squash, marking squashed instructions. */ @@ -264,6 +276,9 @@ class ROB void regStats(); private: + /** Reset the ROB state */ + void resetState(); + /** Pointer to the CPU. */ O3CPU *cpu; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index ee4a9e576..5f62ce539 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -47,12 +59,6 @@ ROB::ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth, numInstsInROB(0), numThreads(_numThreads) { - for (ThreadID tid = 0; tid < numThreads; tid++) { - squashedSeqNum[tid] = 0; - doneSquashing[tid] = true; - threadEntries[tid] = 0; - } - std::string policy = _smtROBPolicy; //Convert string to lowercase @@ -95,10 +101,20 @@ ROB::ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth, "Partitioned, Threshold}"); } - // Set the per-thread iterators to the end of the instruction list. - for (ThreadID tid = 0; tid < numThreads; tid++) { + resetState(); +} + +template +void +ROB::resetState() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + doneSquashing[tid] = true; + threadEntries[tid] = 0; squashIt[tid] = instList[tid].end(); + squashedSeqNum[tid] = 0; } + numInstsInROB = 0; // Initialize the "universal" ROB head & tail point to invalid // pointers @@ -123,28 +139,18 @@ ROB::setActiveThreads(list *at_ptr) template void -ROB::switchOut() +ROB::drainSanityCheck() const { - for (ThreadID tid = 0; tid < numThreads; tid++) { - instList[tid].clear(); - } + for (ThreadID tid = 0; tid < numThreads; tid++) + assert(instList[tid].empty()); + assert(isEmpty()); } template void ROB::takeOverFrom() { - for (ThreadID tid = 0; tid < numThreads; tid++) { - doneSquashing[tid] = true; - threadEntries[tid] = 0; - squashIt[tid] = instList[tid].end(); - } - numInstsInROB = 0; - - // Initialize the "universal" ROB head & tail point to invalid - // pointers - head = instList[0].end(); - tail = instList[0].end(); + resetState(); } template