Comments and code cleanup.

cpu/activity.cc:
cpu/activity.hh:
cpu/o3/alpha_cpu.hh:
    Updates to include comments.
cpu/base_dyn_inst.cc:
    Remove call to thread->misspeculating(), as it's never actually misspeculating.

--HG--
extra : convert_revision : 86574d684770fac9b480475acca048ea418cdac3
This commit is contained in:
Kevin Lim 2006-05-31 11:45:02 -04:00
parent 94eff2f485
commit a514bf2150
36 changed files with 702 additions and 157 deletions

View file

@ -1,3 +1,30 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "base/timebuf.hh"
#include "cpu/activity.hh"
@ -14,6 +41,8 @@ ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency,
void
ActivityRecorder::activity()
{
// If we've already recorded activity for this cycle, we don't
// want to increment the count any more.
if (activityBuffer[0]) {
return;
}
@ -28,6 +57,8 @@ ActivityRecorder::activity()
void
ActivityRecorder::advance()
{
// If there's a 1 in the slot that is about to be erased once the
// time buffer advances, then decrement the activityCount.
if (activityBuffer[-longestLatency]) {
--activityCount;
@ -46,6 +77,7 @@ ActivityRecorder::advance()
void
ActivityRecorder::activateStage(const int idx)
{
// Increment the activity count if this stage wasn't already active.
if (!stageActive[idx]) {
++activityCount;
@ -62,6 +94,7 @@ ActivityRecorder::activateStage(const int idx)
void
ActivityRecorder::deactivateStage(const int idx)
{
// Decrement the activity count if this stage was active.
if (stageActive[idx]) {
--activityCount;

View file

@ -1,3 +1,30 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_ACTIVITY_HH__
#define __CPU_ACTIVITY_HH__
@ -5,33 +32,61 @@
#include "base/timebuf.hh"
#include "base/trace.hh"
/**
* ActivityRecorder helper class that informs the CPU if it can switch
* over to being idle or not. It works by having a time buffer as
* long as any time buffer in the CPU, and the CPU and all of its
* stages inform the ActivityRecorder when they write to any time
* buffer. The ActivityRecorder marks a 1 in the "0" slot of the time
* buffer any time a stage writes to a time buffer, and it advances
* its time buffer at the same time as all other stages. The
* ActivityRecorder also records if a stage has activity to do next
* cycle. The recorder keeps a count of these two. Thus any time the
* count is non-zero, there is either communication still in flight,
* or activity that still must be done, meaning that the CPU can not
* idle. If count is zero, then the CPU can safely idle as it has no
* more outstanding work to do.
*/
class ActivityRecorder {
public:
ActivityRecorder(int num_stages, int longest_latency, int count);
/** Records that there is activity this cycle. */
void activity();
/** Advances the activity buffer, decrementing the activityCount if active
* communication just left the time buffer, and descheduling the CPU if
* there is no activity.
/** Advances the activity buffer, decrementing the activityCount
* if active communication just left the time buffer, and
* determining if there is no activity.
*/
void advance();
/** Marks a stage as active. */
void activateStage(const int idx);
/** Deactivates a stage. */
void deactivateStage(const int idx);
/** Returns how many things are active within the recorder. */
int getActivityCount() { return activityCount; }
/** Sets the count to a starting value. Can be used to disable
* the idling option.
*/
void setActivityCount(int count)
{ activityCount = count; }
/** Returns if the CPU should be active. */
bool active() { return activityCount; }
/** Clears the time buffer and the activity count. */
void reset();
/** Debug function to dump the contents of the time buffer. */
void dump();
/** Debug function to ensure that the activity count matches the
* contents of the time buffer.
*/
void validate();
private:
@ -45,6 +100,7 @@ class ActivityRecorder {
*/
TimeBuffer<bool> activityBuffer;
/** Longest latency time buffer in the CPU. */
int longestLatency;
/** Tracks how many stages and cycles of time buffer have
@ -58,6 +114,7 @@ class ActivityRecorder {
*/
int activityCount;
/** Number of stages that can be marked as active or inactive. */
int numStages;
/** Records which stages are active/inactive. */

View file

@ -166,6 +166,8 @@ BaseDynInst<Impl>::~BaseDynInst()
delete traceData;
}
fault = NoFault;
--instcount;
DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n",
@ -289,7 +291,7 @@ BaseDynInst<Impl>::copy(Addr dest)
{
uint8_t data[64];
FunctionalMemory *mem = thread->mem;
assert(thread->copySrcPhysAddr || thread->misspeculating());
assert(thread->copySrcPhysAddr);
MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64);
req->asid = asid;

View file

@ -39,6 +39,14 @@ namespace Kernel {
class Statistics;
};
/**
* AlphaFullCPU class. Derives from the FullO3CPU class, and
* implements all ISA and implementation specific functions of the
* CPU. This is the CPU class that is used for the SimObjects, and is
* what is given to the DynInsts. Most of its state exists in the
* FullO3CPU; the state is has is mainly for ISA specific
* functionality.
*/
template <class Impl>
class AlphaFullCPU : public FullO3CPU<Impl>
{
@ -56,145 +64,211 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Constructs an AlphaFullCPU with the given parameters. */
AlphaFullCPU(Params *params);
/**
* Derived ExecContext class for use with the AlphaFullCPU. It
* provides the interface for any external objects to access a
* single thread's state and some general CPU state. Any time
* external objects try to update state through this interface,
* the CPU will create an event to squash all in-flight
* instructions in order to ensure state is maintained correctly.
*/
class AlphaXC : public ExecContext
{
public:
/** Pointer to the CPU. */
AlphaFullCPU<Impl> *cpu;
/** Pointer to the thread state that this XC corrseponds to. */
O3ThreadState<Impl> *thread;
/** Returns a pointer to this CPU. */
virtual BaseCPU *getCpuPtr() { return cpu; }
/** Sets this CPU's ID. */
virtual void setCpuId(int id) { cpu->cpu_id = id; }
/** Reads this CPU's ID. */
virtual int readCpuId() { return cpu->cpu_id; }
/** Returns a pointer to functional memory. */
virtual FunctionalMemory *getMemPtr() { return thread->mem; }
#if FULL_SYSTEM
/** Returns a pointer to the system. */
virtual System *getSystemPtr() { return cpu->system; }
/** Returns a pointer to physical memory. */
virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
/** Returns a pointer to the ITB. */
virtual AlphaITB *getITBPtr() { return cpu->itb; }
virtual AlphaDTB * getDTBPtr() { return cpu->dtb; }
/** Returns a pointer to the DTB. */
virtual AlphaDTB *getDTBPtr() { return cpu->dtb; }
/** Returns a pointer to this thread's kernel statistics. */
virtual Kernel::Statistics *getKernelStats()
{ return thread->kernelStats; }
#else
/** Returns a pointer to this thread's process. */
virtual Process *getProcessPtr() { return thread->process; }
#endif
/** Returns this thread's status. */
virtual Status status() const { return thread->status(); }
/** Sets this thread's status. */
virtual void setStatus(Status new_status)
{ thread->setStatus(new_status); }
/// Set the status to Active. Optional delay indicates number of
/// cycles to wait before beginning execution.
/** Set the status to Active. Optional delay indicates number of
* cycles to wait before beginning execution. */
virtual void activate(int delay = 1);
/// Set the status to Suspended.
/** Set the status to Suspended. */
virtual void suspend();
/// Set the status to Unallocated.
/** Set the status to Unallocated. */
virtual void deallocate();
/// Set the status to Halted.
/** Set the status to Halted. */
virtual void halt();
#if FULL_SYSTEM
/** Dumps the function profiling information.
* @todo: Implement.
*/
virtual void dumpFuncProfile();
#endif
/** Takes over execution of a thread from another CPU. */
virtual void takeOverFrom(ExecContext *old_context);
/** Registers statistics associated with this XC. */
virtual void regStats(const std::string &name);
/** Serializes state. */
virtual void serialize(std::ostream &os);
/** Unserializes state. */
virtual void unserialize(Checkpoint *cp, const std::string &section);
#if FULL_SYSTEM
/** Returns pointer to the quiesce event. */
virtual EndQuiesceEvent *getQuiesceEvent();
/** Reads the last tick that this thread was activated on. */
virtual Tick readLastActivate();
/** Reads the last tick that this thread was suspended on. */
virtual Tick readLastSuspend();
/** Clears the function profiling information. */
virtual void profileClear();
/** Samples the function profiling information. */
virtual void profileSample();
#endif
/** Returns this thread's ID number. */
virtual int getThreadNum() { return thread->tid; }
/** Returns the instruction this thread is currently committing.
* Only used when an instruction faults.
*/
virtual TheISA::MachInst getInst();
/** Copies the architectural registers from another XC into this XC. */
virtual void copyArchRegs(ExecContext *xc);
/** Resets all architectural registers to 0. */
virtual void clearArchRegs();
/** Reads an integer register. */
virtual uint64_t readIntReg(int reg_idx);
/** Reads a single precision floating point register. */
virtual float readFloatRegSingle(int reg_idx);
/** Reads a double precision floating point register. */
virtual double readFloatRegDouble(int reg_idx);
/** Reads a floating point register as an integer value. */
virtual uint64_t readFloatRegInt(int reg_idx);
/** Sets an integer register to a value. */
virtual void setIntReg(int reg_idx, uint64_t val);
/** Sets a single precision fp register to a value. */
virtual void setFloatRegSingle(int reg_idx, float val);
/** Sets a double precision fp register to a value. */
virtual void setFloatRegDouble(int reg_idx, double val);
/** Sets a fp register to an integer value. */
virtual void setFloatRegInt(int reg_idx, uint64_t val);
/** Reads this thread's PC. */
virtual uint64_t readPC()
{ return cpu->readPC(thread->tid); }
/** Sets this thread's PC. */
virtual void setPC(uint64_t val);
/** Reads this thread's next PC. */
virtual uint64_t readNextPC()
{ return cpu->readNextPC(thread->tid); }
/** Sets this thread's next PC. */
virtual void setNextPC(uint64_t val);
/** Reads a miscellaneous register. */
virtual MiscReg readMiscReg(int misc_reg)
{ return cpu->readMiscReg(misc_reg, thread->tid); }
/** Reads a misc. register, including any side-effects the
* read might have as defined by the architecture. */
virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
{ return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); }
/** Sets a misc. register. */
virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
/** Sets a misc. register, including any side-effects the
* write might have as defined by the architecture. */
virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
/** Returns the number of consecutive store conditional failures. */
// @todo: Figure out where these store cond failures should go.
virtual unsigned readStCondFailures()
{ return thread->storeCondFailures; }
/** Sets the number of consecutive store conditional failures. */
virtual void setStCondFailures(unsigned sc_failures)
{ thread->storeCondFailures = sc_failures; }
#if FULL_SYSTEM
/** Returns if the thread is currently in PAL mode, based on
* the PC's value. */
virtual bool inPalMode()
{ return TheISA::PcPAL(cpu->readPC(thread->tid)); }
#endif
// Only really makes sense for old CPU model. Lots of code
// outside the CPU still checks this function, so it will
// always return false to keep everything working.
/** Checks if the thread is misspeculating. Because it is
* very difficult to determine if the thread is
* misspeculating, this is set as false. */
virtual bool misspeculating() { return false; }
#if !FULL_SYSTEM
/** Gets a syscall argument by index. */
virtual IntReg getSyscallArg(int i);
/** Sets a syscall argument. */
virtual void setSyscallArg(int i, IntReg val);
/** Sets the syscall return value. */
virtual void setSyscallReturn(SyscallReturn return_value);
/** Executes a syscall in SE mode. */
virtual void syscall() { return cpu->syscall(thread->tid); }
/** Reads the funcExeInst counter. */
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
#endif
};
@ -260,19 +334,32 @@ class AlphaFullCPU : public FullO3CPU<Impl>
}
#endif
/** Reads a miscellaneous register. */
MiscReg readMiscReg(int misc_reg, unsigned tid);
/** Reads a misc. register, including any side effects the read
* might have as defined by the architecture.
*/
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
/** Sets a miscellaneous register. */
Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
/** Sets a misc. register, including any side effects the write
* might have as defined by the architecture.
*/
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
/** Initiates a squash of all in-flight instructions for a given
* thread. The source of the squash is an external update of
* state through the XC.
*/
void squashFromXC(unsigned tid);
#if FULL_SYSTEM
/** Posts an interrupt. */
void post_interrupt(int int_num, int index);
/** Reads the interrupt flag. */
int readIntrFlag();
/** Sets the interrupt flags. */
void setIntrFlag(int val);
@ -298,7 +385,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Executes a syscall.
* @todo: Determine if this needs to be virtual.
*/
void syscall(int thread_num);
void syscall(int tid);
/** Gets a syscall argument. */
IntReg getSyscallArg(int i, int tid);
@ -424,6 +511,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
Addr lockAddr;
/** Temporary fix for the lock flag, works in the UP case. */
bool lockFlag;
};

View file

@ -59,10 +59,12 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
{
DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
// Setup any thread state.
this->thread.resize(this->numThreads);
for (int i = 0; i < this->numThreads; ++i) {
#if FULL_SYSTEM
// SMT is not supported in FS mode yet.
assert(this->numThreads == 1);
this->thread[i] = new Thread(this, 0, params->mem);
this->thread[i]->setStatus(ExecContext::Suspended);
@ -87,29 +89,34 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
}
#endif // !FULL_SYSTEM
this->thread[i]->numInst = 0;
ExecContext *xc_proxy;
AlphaXC *alpha_xc_proxy = new AlphaXC;
// Setup the XC that will serve as the interface to the threads/CPU.
AlphaXC *alpha_xc = new AlphaXC;
// If we're using a checker, then the XC should be the
// CheckerExecContext.
if (params->checker) {
xc_proxy = new CheckerExecContext<AlphaXC>(alpha_xc_proxy, this->checker);
xc_proxy = new CheckerExecContext<AlphaXC>(
alpha_xc, this->checker);
} else {
xc_proxy = alpha_xc_proxy;
xc_proxy = alpha_xc;
}
alpha_xc_proxy->cpu = this;
alpha_xc_proxy->thread = this->thread[i];
alpha_xc->cpu = this;
alpha_xc->thread = this->thread[i];
#if FULL_SYSTEM
// Setup quiesce event.
this->thread[i]->quiesceEvent =
new EndQuiesceEvent(xc_proxy);
this->thread[i]->lastActivate = 0;
this->thread[i]->lastSuspend = 0;
#endif
// Give the thread the XC.
this->thread[i]->xcProxy = xc_proxy;
// Add the XC to the CPU's list of XC's.
this->execContexts.push_back(xc_proxy);
}
@ -171,6 +178,7 @@ AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
setStatus(old_context->status());
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
#if !FULL_SYSTEM
thread->funcExeInst = old_context->readFuncExeInst();
#else
@ -394,7 +402,6 @@ template <class Impl>
uint64_t
AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
{
DPRINTF(Fault, "Reading int register through the XC!\n");
return cpu->readArchIntReg(reg_idx, thread->tid);
}
@ -402,7 +409,6 @@ template <class Impl>
float
AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx)
{
DPRINTF(Fault, "Reading float register through the XC!\n");
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
}
@ -410,7 +416,6 @@ template <class Impl>
double
AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx)
{
DPRINTF(Fault, "Reading float register through the XC!\n");
return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
}
@ -418,7 +423,6 @@ template <class Impl>
uint64_t
AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx)
{
DPRINTF(Fault, "Reading floatint register through the XC!\n");
return cpu->readArchFloatRegInt(reg_idx, thread->tid);
}
@ -426,9 +430,9 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
{
DPRINTF(Fault, "Setting int register through the XC!\n");
cpu->setArchIntReg(reg_idx, val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -438,9 +442,9 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
{
DPRINTF(Fault, "Setting float register through the XC!\n");
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -450,9 +454,9 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
{
DPRINTF(Fault, "Setting float register through the XC!\n");
cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -462,9 +466,9 @@ template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val)
{
DPRINTF(Fault, "Setting floatint register through the XC!\n");
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -476,6 +480,7 @@ AlphaFullCPU<Impl>::AlphaXC::setPC(uint64_t val)
{
cpu->setPC(val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -487,6 +492,7 @@ AlphaFullCPU<Impl>::AlphaXC::setNextPC(uint64_t val)
{
cpu->setNextPC(val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -496,10 +502,9 @@ template <class Impl>
Fault
AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
{
DPRINTF(Fault, "Setting misc register through the XC!\n");
Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -509,12 +514,12 @@ AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
template <class Impl>
Fault
AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg,
const MiscReg &val)
{
DPRINTF(Fault, "Setting misc register through the XC!\n");
Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid);
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
}
@ -595,7 +600,6 @@ AlphaFullCPU<Impl>::post_interrupt(int int_num, int index)
if (this->thread[0]->status() == ExecContext::Suspended) {
DPRINTF(IPI,"Suspended Processor awoke\n");
// xcProxies[0]->activate();
this->execContexts[0]->activate();
}
}
@ -658,6 +662,7 @@ template <class Impl>
void
AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
{
// Pass the thread's XC into the invoke method.
fault->invoke(this->execContexts[tid]);
}
@ -708,6 +713,7 @@ AlphaFullCPU<Impl>::processInterrupts()
if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
this->setMiscReg(IPR_ISR, summary, 0);
this->setMiscReg(IPR_INTID, ipl, 0);
// Checker needs to know these two registers were updated.
if (this->checker) {
this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);

View file

@ -86,23 +86,31 @@ class AlphaDynInst : public BaseDynInst<Impl>
void initVars();
public:
/** Reads a miscellaneous register. */
MiscReg readMiscReg(int misc_reg)
{
return this->cpu->readMiscReg(misc_reg, this->threadNumber);
}
/** Reads a misc. register, including any side-effects the read
* might have as defined by the architecture.
*/
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
{
return this->cpu->readMiscRegWithEffect(misc_reg, fault,
this->threadNumber);
}
/** Sets a misc. register. */
Fault setMiscReg(int misc_reg, const MiscReg &val)
{
this->instResult.integer = val;
return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
}
/** Sets a misc. register, including any side-effects the write
* might have as defined by the architecture.
*/
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
{
return this->cpu->setMiscRegWithEffect(misc_reg, val,

View file

@ -64,9 +64,10 @@ template <class Impl>
Fault
AlphaDynInst<Impl>::execute()
{
// @todo: Pretty convoluted way to avoid squashing from happening when using
// the XC during an instruction's execution (specifically for instructions
// that have sideeffects that use the XC). Fix this.
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the XC during an instruction's execution
// (specifically for instructions that have side-effects that use
// the XC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
@ -81,9 +82,10 @@ template <class Impl>
Fault
AlphaDynInst<Impl>::initiateAcc()
{
// @todo: Pretty convoluted way to avoid squashing from happening when using
// the XC during an instruction's execution (specifically for instructions
// that have sideeffects that use the XC). Fix this.
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the XC during an instruction's execution
// (specifically for instructions that have side-effects that use
// the XC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
@ -99,10 +101,12 @@ Fault
AlphaDynInst<Impl>::completeAcc()
{
if (this->isLoad()) {
// Loads need the request's data to complete the access.
this->fault = this->staticInst->completeAcc(this->req->data,
this,
this->traceData);
} else if (this->isStore()) {
// Stores need the result of the request to complete their access.
this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
this,
this->traceData);
@ -118,9 +122,11 @@ template <class Impl>
Fault
AlphaDynInst<Impl>::hwrei()
{
// Can only do a hwrei when in pal mode.
if (!this->cpu->inPalMode(this->readPC()))
return new AlphaISA::UnimplementedOpcodeFault;
// Set the next PC based on the value of the EXC_ADDR IPR.
this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR,
this->threadNumber));

View file

@ -125,7 +125,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params
Tick fetchTrapLatency;
//
// Branch predictor (BP & BTB)
// Branch predictor (BP, BTB, RAS)
//
std::string predType;
unsigned localPredictorSize;

View file

@ -41,6 +41,7 @@
// typedef yet are not templated on the Impl. For now it will be defined here.
typedef short int PhysRegIndex;
/** Struct that defines the information passed from fetch to decode. */
template<class Impl>
struct DefaultFetchDefaultDecode {
typedef typename Impl::DynInstPtr DynInstPtr;
@ -53,6 +54,7 @@ struct DefaultFetchDefaultDecode {
bool clearFetchFault;
};
/** Struct that defines the information passed from decode to rename. */
template<class Impl>
struct DefaultDecodeDefaultRename {
typedef typename Impl::DynInstPtr DynInstPtr;
@ -62,6 +64,7 @@ struct DefaultDecodeDefaultRename {
DynInstPtr insts[Impl::MaxWidth];
};
/** Struct that defines the information passed from rename to IEW. */
template<class Impl>
struct DefaultRenameDefaultIEW {
typedef typename Impl::DynInstPtr DynInstPtr;
@ -71,6 +74,7 @@ struct DefaultRenameDefaultIEW {
DynInstPtr insts[Impl::MaxWidth];
};
/** Struct that defines the information passed from IEW to commit. */
template<class Impl>
struct DefaultIEWDefaultCommit {
typedef typename Impl::DynInstPtr DynInstPtr;
@ -98,6 +102,7 @@ struct IssueStruct {
DynInstPtr insts[Impl::MaxWidth];
};
/** Struct that defines all backwards communication. */
template<class Impl>
struct TimeBufStruct {
struct decodeComm {
@ -119,13 +124,7 @@ struct TimeBufStruct {
decodeComm decodeInfo[Impl::MaxThreads];
// Rename can't actually tell anything to squash or send a new PC back
// because it doesn't do anything along those lines. But maybe leave
// these fields in here to keep the stages mostly orthagonal.
struct renameComm {
bool squash;
uint64_t nextPC;
};
renameComm renameInfo[Impl::MaxThreads];

View file

@ -84,6 +84,9 @@ class DefaultCommit
typedef O3ThreadState<Impl> Thread;
/** Event class used to schedule a squash due to a trap (fault or
* interrupt) to happen on a specific cycle.
*/
class TrapEvent : public Event {
private:
DefaultCommit<Impl> *commit;
@ -161,7 +164,7 @@ class DefaultCommit
Fetch *fetchStage;
/** Sets the poitner to the IEW stage. */
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
/** The pointer to the IEW stage. Used solely to ensure that
@ -182,10 +185,13 @@ class DefaultCommit
/** Initializes stage by sending back the number of free entries. */
void initStage();
/** Initializes the switching out of commit. */
void switchOut();
/** Completes the switch out of commit. */
void doSwitchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Ticks the commit stage, which tries to commit instructions. */
@ -199,11 +205,18 @@ class DefaultCommit
/** Returns the number of free ROB entries for a specific thread. */
unsigned numROBFreeEntries(unsigned tid);
/** Generates an event to schedule a squash due to a trap. */
void generateTrapEvent(unsigned tid);
/** Records that commit needs to initiate a squash due to an
* external state update through the XC.
*/
void generateXCEvent(unsigned tid);
private:
/** Updates the overall status of commit with the nextStatus, and
* tell the CPU if commit is active/inactive. */
* tell the CPU if commit is active/inactive.
*/
void updateStatus();
/** Sets the next status based on threads' statuses, which becomes the
@ -222,10 +235,13 @@ class DefaultCommit
*/
bool changedROBEntries();
/** Squashes all in flight instructions. */
void squashAll(unsigned tid);
/** Handles squashing due to a trap. */
void squashFromTrap(unsigned tid);
/** Handles squashing due to an XC write. */
void squashFromXC(unsigned tid);
/** Commits as many instructions as possible. */
@ -236,8 +252,6 @@ class DefaultCommit
*/
bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
void generateTrapEvent(unsigned tid);
/** Gets instructions from rename and inserts them into the ROB. */
void getInsts();
@ -259,12 +273,16 @@ class DefaultCommit
*/
uint64_t readPC() { return PC[0]; }
/** Returns the PC of a specific thread. */
uint64_t readPC(unsigned tid) { return PC[tid]; }
/** Sets the PC of a specific thread. */
void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
/** Reads the PC of a specific thread. */
uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
/** Sets the next PC of a specific thread. */
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
private:
@ -304,6 +322,7 @@ class DefaultCommit
/** Memory interface. Used for d-cache accesses. */
MemInterface *dcacheInterface;
/** Vector of all of the threads. */
std::vector<Thread *> thread;
Fault fetchFault;
@ -362,17 +381,27 @@ class DefaultCommit
/** Number of Active Threads */
unsigned numThreads;
/** Is a switch out pending. */
bool switchPending;
/** Is commit switched out. */
bool switchedOut;
/** The latency to handle a trap. Used when scheduling trap
* squash event.
*/
Tick trapLatency;
Tick fetchTrapLatency;
Tick fetchFaultTick;
/** The commit PC of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
Addr PC[Impl::MaxThreads];
/** The next PC of each thread. */
Addr nextPC[Impl::MaxThreads];
/** The sequence number of the youngest valid instruction in the ROB. */
@ -384,6 +413,7 @@ class DefaultCommit
/** Rename map interface. */
RenameMap *renameMap[Impl::MaxThreads];
/** Updates commit stats based on this instruction. */
void updateComInstStats(DynInstPtr &inst);
/** Stat for the total number of committed instructions. */
@ -417,7 +447,9 @@ class DefaultCommit
/** Total number of committed branches. */
Stats::Vector<> statComBranches;
/** Number of cycles where the commit bandwidth limit is reached. */
Stats::Scalar<> commitEligibleSamples;
/** Number of instructions not committed due to bandwidth limits. */
Stats::Vector<> commitEligible;
};

View file

@ -691,7 +691,7 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
/*
if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
// Record the fault. Wait until it's empty in the ROB.
// Then handle the trap. Ignore it if there's already a
@ -713,7 +713,7 @@ DefaultCommit<Impl>::commit()
commitStatus[0] = Running;
}
}
*/
// Not sure which one takes priority. I think if we have
// both, that's a bad sign.
if (trapSquash[tid] == true) {
@ -925,7 +925,7 @@ DefaultCommit<Impl>::commitInsts()
numCommittedDist.sample(num_committed);
if (num_committed == commitWidth) {
commitEligible[0]++;
commitEligibleSamples[0]++;
}
}
@ -947,6 +947,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->reachedCommit = true;
if (head_inst->isNonSpeculative() ||
head_inst->isStoreConditional() ||
head_inst->isMemBarrier() ||
head_inst->isWriteBarrier()) {

View file

@ -67,6 +67,11 @@ class BaseFullCPU : public BaseCPU
int cpu_id;
};
/**
* FullO3CPU class, has each of the stages (fetch through commit)
* within it, as well as all of the time buffers between stages. The
* tick() function for the CPU is defined here.
*/
template <class Impl>
class FullO3CPU : public BaseFullCPU
{
@ -194,17 +199,13 @@ class FullO3CPU : public BaseFullCPU
*/
virtual void syscall(int tid) { panic("Unimplemented!"); }
/** Check if there are any system calls pending. */
void checkSyscalls();
/** Switches out this CPU.
*/
/** Switches out this CPU. */
void switchOut(Sampler *sampler);
/** Signals to this CPU that a stage has completed switching out. */
void signalSwitched();
/** Takes over from another CPU.
*/
/** Takes over from another CPU. */
void takeOverFrom(BaseCPU *oldCPU);
/** Get the current instruction sequence number, and increment it. */
@ -244,9 +245,7 @@ class FullO3CPU : public BaseFullCPU
#endif
//
// New accessors for new decoder.
//
/** Register accessors. Index refers to the physical register index. */
uint64_t readIntReg(int reg_idx);
float readFloatRegSingle(int reg_idx);
@ -271,6 +270,11 @@ class FullO3CPU : public BaseFullCPU
uint64_t readArchFloatRegInt(int reg_idx, unsigned tid);
/** Architectural register accessors. Looks up in the commit
* rename table to obtain the true physical index of the
* architected register first, then accesses that physical
* register.
*/
void setArchIntReg(int reg_idx, uint64_t val, unsigned tid);
void setArchFloatRegSingle(int reg_idx, float val, unsigned tid);
@ -279,13 +283,17 @@ class FullO3CPU : public BaseFullCPU
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
/** Reads the commit PC of a specific thread. */
uint64_t readPC(unsigned tid);
void setPC(Addr new_PC,unsigned tid);
/** Sets the commit PC of a specific thread. */
void setPC(Addr new_PC, unsigned tid);
/** Reads the next PC of a specific thread. */
uint64_t readNextPC(unsigned tid);
void setNextPC(uint64_t val,unsigned tid);
/** Sets the next PC of a specific thread. */
void setNextPC(uint64_t val, unsigned tid);
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
@ -309,21 +317,15 @@ class FullO3CPU : public BaseFullCPU
/** Remove all instructions younger than the given sequence number. */
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
/** Removes the instruction pointed to by the iterator. */
inline void squashInstIt(const ListIt &instIt, const unsigned &tid);
/** Cleans up all instructions on the remove list. */
void cleanUpRemovedInsts();
/** Remove all instructions from the list. */
// void removeAllInsts();
/** Debug function to print all instructions on the list. */
void dumpInsts();
/** Basically a wrapper function so that instructions executed at
* commit can tell the instruction queue that they have
* completed. Eventually this hack should be removed.
*/
// void wakeDependents(DynInstPtr &inst);
public:
/** List of all the instructions in flight. */
std::list<DynInstPtr> instList;
@ -334,6 +336,9 @@ class FullO3CPU : public BaseFullCPU
std::queue<ListIt> removeList;
#ifdef DEBUG
/** Debug structure to keep track of the sequence numbers still in
* flight.
*/
std::set<InstSeqNum> snList;
#endif
@ -420,14 +425,22 @@ class FullO3CPU : public BaseFullCPU
/** The IEW stage's instruction queue. */
TimeBuffer<IEWStruct> iewQueue;
public:
private:
/** The activity recorder; used to tell if the CPU has any
* activity remaining or if it can go to idle and deschedule
* itself.
*/
ActivityRecorder activityRec;
public:
/** Records that there was time buffer activity this cycle. */
void activityThisCycle() { activityRec.activity(); }
/** Changes a stage's status to active within the activity recorder. */
void activateStage(const StageIdx idx)
{ activityRec.activateStage(idx); }
/** Changes a stage's status to inactive within the activity recorder. */
void deactivateStage(const StageIdx idx)
{ activityRec.deactivateStage(idx); }
@ -438,7 +451,7 @@ class FullO3CPU : public BaseFullCPU
int getFreeTid();
public:
/** Temporary function to get pointer to exec context. */
/** Returns a pointer to a thread's exec context. */
ExecContext *xcBase(unsigned tid)
{
return thread[tid]->getXCProxy();
@ -447,6 +460,10 @@ class FullO3CPU : public BaseFullCPU
/** The global sequence number counter. */
InstSeqNum globalSeqNum;
/** Pointer to the checker, which can dynamically verify
* instruction results at run time. This can be set to NULL if it
* is not being used.
*/
Checker<DynInstPtr> *checker;
#if FULL_SYSTEM
@ -462,11 +479,13 @@ class FullO3CPU : public BaseFullCPU
/** Pointer to memory. */
FunctionalMemory *mem;
/** Pointer to the sampler */
Sampler *sampler;
/** Counter of how many stages have completed switching out. */
int switchCount;
// List of all ExecContexts.
/** Pointers to all of the threads in the CPU. */
std::vector<Thread *> thread;
#if 0

View file

@ -48,24 +48,50 @@
#include "cpu/o3/comm.hh"
/**
* Struct that defines the key classes to be used by the CPU. All
* classes use the typedefs defined here to determine what are the
* classes of the other stages and communication buffers. In order to
* change a structure such as the IQ, simply change the typedef here
* to use the desired class instead, and recompile. In order to
* create a different CPU to be used simultaneously with this one, see
* the alpha_impl.hh file for instructions.
*/
template<class Impl>
struct SimpleCPUPolicy
{
/** Typedef for the branch prediction unit (which includes the BP,
* RAS, and BTB).
*/
typedef BPredUnit<Impl> BPredUnit;
/** Typedef for the register file. Most classes assume a unified
* physical register file.
*/
typedef PhysRegFile<Impl> RegFile;
/** Typedef for the freelist of registers. */
typedef SimpleFreeList FreeList;
/** Typedef for the rename map. */
typedef SimpleRenameMap RenameMap;
/** Typedef for the ROB. */
typedef ROB<Impl> ROB;
/** Typedef for the instruction queue/scheduler. */
typedef InstructionQueue<Impl> IQ;
/** Typedef for the memory dependence unit. */
typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
/** Typedef for the LSQ. */
typedef LSQ<Impl> LSQ;
/** Typedef for the thread-specific LSQ units. */
typedef LSQUnit<Impl> LSQUnit;
/** Typedef for fetch. */
typedef DefaultFetch<Impl> Fetch;
/** Typedef for decode. */
typedef DefaultDecode<Impl> Decode;
/** Typedef for rename. */
typedef DefaultRename<Impl> Rename;
/** Typedef for Issue/Execute/Writeback. */
typedef DefaultIEW<Impl> IEW;
/** Typedef for commit. */
typedef DefaultCommit<Impl> Commit;
/** The struct for communication between fetch and decode. */

View file

@ -107,9 +107,12 @@ class DefaultDecode
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Switches out the decode stage. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Ticks decode, processing all input signals and decoding as many
* instructions as possible.
*/

View file

@ -41,6 +41,7 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
{
_status = Inactive;
// Setup status, make sure stall signals are clear.
for (int i = 0; i < numThreads; ++i) {
decodeStatus[i] = Idle;
@ -165,6 +166,7 @@ template <class Impl>
void
DefaultDecode<Impl>::switchOut()
{
// Decode can immediately switch out.
cpu->signalSwitched();
}
@ -174,6 +176,7 @@ DefaultDecode<Impl>::takeOverFrom()
{
_status = Inactive;
// Be sure to reset state and clear out any old instructions.
for (int i = 0; i < numThreads; ++i) {
decodeStatus[i] = Idle;
@ -222,22 +225,22 @@ DefaultDecode<Impl>::block(unsigned tid)
{
DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
// If the decode status is blocked or unblocking then decode has not yet
// signalled fetch to unblock. In that case, there is no need to tell
// fetch to block.
if (decodeStatus[tid] != Blocked &&
decodeStatus[tid] != Unblocking) {
toFetch->decodeBlock[tid] = true;
wroteToTimeBuffer = true;
}
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidInsert(tid);
// If the decode status is blocked or unblocking then decode has not yet
// signalled fetch to unblock. In that case, there is no need to tell
// fetch to block.
if (decodeStatus[tid] != Blocked) {
// Set the status to Blocked.
decodeStatus[tid] = Blocked;
if (decodeStatus[tid] != Unblocking) {
toFetch->decodeBlock[tid] = true;
wroteToTimeBuffer = true;
}
return true;
}
@ -270,13 +273,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
"detected at decode.\n", tid);
// Send back mispredict information.
toFetch->decodeInfo[tid].branchMispredict = true;
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
toFetch->decodeInfo[tid].branchTaken = true;
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
// Might have to tell fetch to unblock.
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
toFetch->decodeUnblock[tid] = 1;
@ -292,11 +298,12 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
}
}
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
// Clear the skid buffer in case it has any data in it.
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
@ -341,11 +348,12 @@ DefaultDecode<Impl>::squash(unsigned tid)
}
}
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
// Clear the skid buffer in case it has any data in it.
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}

View file

@ -4,6 +4,7 @@
#include "cpu/o3/comm.hh"
/** Node in a linked list. */
template <class DynInstPtr>
class DependencyEntry
{
@ -18,32 +19,50 @@ class DependencyEntry
DependencyEntry<DynInstPtr> *next;
};
/** Array of linked list that maintains the dependencies between
* producing instructions and consuming instructions. Each linked
* list represents a single physical register, having the future
* producer of the register's value, and all consumers waiting on that
* value on the list. The head node of each linked list represents
* the producing instruction of that register. Instructions are put
* on the list upon reaching the IQ, and are removed from the list
* either when the producer completes, or the instruction is squashed.
*/
template <class DynInstPtr>
class DependencyGraph
{
public:
typedef DependencyEntry<DynInstPtr> DepEntry;
/** Default construction. Must call resize() prior to use. */
DependencyGraph()
: numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
{ }
/** Resize the dependency graph to have num_entries registers. */
void resize(int num_entries);
/** Clears all of the linked lists. */
void reset();
/** Inserts an instruction to be dependent on the given index. */
void insert(PhysRegIndex idx, DynInstPtr &new_inst);
/** Sets the producing instruction of a given register. */
void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
{ dependGraph[idx].inst = new_inst; }
/** Clears the producing instruction. */
void clearInst(PhysRegIndex idx)
{ dependGraph[idx].inst = NULL; }
/** Removes an instruction from a single linked list. */
void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
/** Removes and returns the newest dependent of a specific register. */
DynInstPtr pop(PhysRegIndex idx);
/** Checks if there are any dependents on a specific register. */
bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
/** Debugging function to dump out the dependency graph.
@ -59,13 +78,16 @@ class DependencyGraph
*/
DepEntry *dependGraph;
/** Number of linked lists; identical to the number of registers. */
int numEntries;
// Debug variable, remove when done testing.
unsigned memAllocCounter;
public:
// Debug variable, remove when done testing.
uint64_t nodesTraversed;
// Debug variable, remove when done testing.
uint64_t nodesRemoved;
};

View file

@ -42,7 +42,7 @@ class Sampler;
* width is specified by the parameters; each cycle it tries to fetch
* that many instructions. It supports using a branch predictor to
* predict direction and targets.
* It supports the idling functionalitiy of the CPU by indicating to
* It supports the idling functionality of the CPU by indicating to
* the CPU when it is active and inactive.
*/
template <class Impl>
@ -163,14 +163,19 @@ class DefaultFetch
/** Processes cache completion event. */
void processCacheCompletion(MemReqPtr &req);
/** Begins the switch out of the fetch stage. */
void switchOut();
/** Completes the switch out of the fetch stage. */
void doSwitchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Checks if the fetch stage is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Tells fetch to wake up from a quiesce instruction. */
void wakeFromQuiesce();
private:
@ -301,8 +306,10 @@ class DefaultFetch
/** BPredUnit. */
BPredUnit branchPred;
/** Per-thread fetch PC. */
Addr PC[Impl::MaxThreads];
/** Per-thread next PC. */
Addr nextPC[Impl::MaxThreads];
/** Memory request used to access cache. */
@ -369,8 +376,12 @@ class DefaultFetch
/** Thread ID being fetched. */
int threadFetched;
/** Checks if there is an interrupt pending. If there is, fetch
* must stop once it is not fetching PAL instructions.
*/
bool interruptPending;
/** Records if fetch is switched out. */
bool switchedOut;
#if !FULL_SYSTEM
@ -394,17 +405,23 @@ class DefaultFetch
* the pipeline.
*/
Stats::Scalar<> fetchIdleCycles;
/** Total number of cycles spent blocked. */
Stats::Scalar<> fetchBlockedCycles;
/** Total number of cycles spent in any other state. */
Stats::Scalar<> fetchMiscStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
/** Total number of outstanding icache accesses that were dropped
* due to a squash.
*/
Stats::Scalar<> fetchIcacheSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution<> fetchNisnDist;
/** Rate of how often fetch was idle. */
Stats::Formula idleRate;
/** Number of branch fetches per cycle. */
Stats::Formula branchRate;
/** Number of instruction fetched per cycle. */
Stats::Formula fetchRate;
};

View file

@ -161,59 +161,59 @@ void
DefaultFetch<Impl>::regStats()
{
icacheStallCycles
.name(name() + ".FETCH:icacheStallCycles")
.name(name() + ".icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles);
fetchedInsts
.name(name() + ".FETCH:Insts")
.name(name() + ".Insts")
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
fetchedBranches
.name(name() + ".FETCH:Branches")
.name(name() + ".Branches")
.desc("Number of branches that fetch encountered")
.prereq(fetchedBranches);
predictedBranches
.name(name() + ".FETCH:predictedBranches")
.name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches);
fetchCycles
.name(name() + ".FETCH:Cycles")
.name(name() + ".Cycles")
.desc("Number of cycles fetch has run and was not squashing or"
" blocked")
.prereq(fetchCycles);
fetchSquashCycles
.name(name() + ".FETCH:SquashCycles")
.name(name() + ".SquashCycles")
.desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles);
fetchIdleCycles
.name(name() + ".FETCH:IdleCycles")
.name(name() + ".IdleCycles")
.desc("Number of cycles fetch was idle")
.prereq(fetchIdleCycles);
fetchBlockedCycles
.name(name() + ".FETCH:BlockedCycles")
.name(name() + ".BlockedCycles")
.desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles);
fetchedCacheLines
.name(name() + ".FETCH:CacheLines")
.name(name() + ".CacheLines")
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetchMiscStallCycles
.name(name() + ".FETCH:MiscStallCycles")
.name(name() + ".MiscStallCycles")
.desc("Number of cycles fetch has spent waiting on interrupts, or "
"bad addresses, or out of MSHRs")
.prereq(fetchMiscStallCycles);
fetchIcacheSquashes
.name(name() + ".FETCH:IcacheSquashes")
.name(name() + ".IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
@ -221,24 +221,24 @@ DefaultFetch<Impl>::regStats()
.init(/* base value */ 0,
/* last value */ fetchWidth,
/* bucket size */ 1)
.name(name() + ".FETCH:rateDist")
.name(name() + ".rateDist")
.desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf);
idleRate
.name(name() + ".FETCH:idleRate")
.name(name() + ".idleRate")
.desc("Percent of cycles fetch was idle")
.prereq(idleRate);
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
branchRate
.name(name() + ".FETCH:branchRate")
.name(name() + ".branchRate")
.desc("Number of branch fetches per cycle")
.flags(Stats::total);
branchRate = predictedBranches / cpu->numCycles;
branchRate = fetchedBranches / cpu->numCycles;
fetchRate
.name(name() + ".FETCH:rate")
.name(name() + ".rate")
.desc("Number of inst fetches per cycle")
.flags(Stats::total);
fetchRate = fetchedInsts / cpu->numCycles;
@ -307,6 +307,7 @@ template<class Impl>
void
DefaultFetch<Impl>::initStage()
{
// Setup PC and nextPC with initial state.
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
@ -323,8 +324,6 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Only change the status if it's still waiting on the icache access
// to return.
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
if (fetchStatus[tid] != IcacheMissStall ||
req != memReq[tid] ||
isSwitchedOut()) {
@ -358,6 +357,7 @@ template <class Impl>
void
DefaultFetch<Impl>::switchOut()
{
// Fetch is ready to switch out at any time.
switchedOut = true;
cpu->signalSwitched();
}
@ -366,6 +366,7 @@ template <class Impl>
void
DefaultFetch<Impl>::doSwitchOut()
{
// Branch predictor needs to have its state cleared.
branchPred.switchOut();
}
@ -396,6 +397,7 @@ DefaultFetch<Impl>::wakeFromQuiesce()
{
DPRINTF(Fetch, "Waking up from quiesce\n");
// Hopefully this is safe
// @todo: Allow other threads to wake from quiesce.
fetchStatus[0] = Running;
}
@ -831,7 +833,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
}
}
if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) {
if (fetchStatus[tid] != IcacheMissStall && checkStall(tid)) {
DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
fetchStatus[tid] = Blocked;
@ -1199,7 +1201,7 @@ DefaultFetch<Impl>::lsqCount()
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheMissComplete ||
fetchStatus[high_pri] == Idle)
fetchStatus[high_pri] == Idle)
return high_pri;
else
PQ.pop();

View file

@ -183,6 +183,8 @@ FUPool::getUnit(OpClass capability)
}
}
assert(fu_idx < numFU);
unitBusy[fu_idx] = true;
return fu_idx;

View file

@ -155,7 +155,10 @@ class FUPool : public SimObject
return maxIssueLatencies[capability];
}
/** Switches out functional unit pool. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
};

View file

@ -160,12 +160,16 @@ class DefaultIEW
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *sb_ptr);
/** Starts switch out of IEW stage. */
void switchOut();
/** Completes switch out of IEW stage. */
void doSwitchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Returns if IEW is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Sets page table pointer within LSQ. */
@ -287,6 +291,7 @@ class DefaultIEW
void tick();
private:
/** Updates execution stats based on the instruction. */
void updateExeInstStats(DynInstPtr &inst);
/** Pointer to main time buffer used for backwards communication. */
@ -429,6 +434,7 @@ class DefaultIEW
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;
/** Is this stage switched out. */
bool switchedOut;
/** Stat for total number of idle cycles. */
@ -470,9 +476,13 @@ class DefaultIEW
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
/** Number of executed software prefetches. */
Stats::Vector<> exeSwp;
/** Number of executed nops. */
Stats::Vector<> exeNop;
/** Number of executed meomory references. */
Stats::Vector<> exeRefs;
/** Number of executed branches. */
Stats::Vector<> exeBranches;
// Stats::Vector<> issued_ops;
@ -482,19 +492,30 @@ class DefaultIEW
Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
*/
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
/** Number of executed store instructions. */
Stats::Formula iewExecStoreInsts;
// Stats::Formula issue_op_rate;
// Stats::Formula fu_busy_rate;
/** Number of instructions sent to commit. */
Stats::Vector<> iewInstsToCommit;
/** Number of instructions that writeback. */
Stats::Vector<> writebackCount;
/** Number of instructions that wake consumers. */
Stats::Vector<> producerInst;
/** Number of instructions that wake up from producers. */
Stats::Vector<> consumerInst;
/** Number of instructions that were delayed in writing back due
* to resource contention.
*/
Stats::Vector<> wbPenalized;
/** Number of instructions per cycle written back. */
Stats::Formula wbRate;
/** Average number of woken instructions per writeback. */
Stats::Formula wbFanout;
/** Number of instructions per cycle delayed in writing back . */
Stats::Formula wbPenalizedRate;
};

View file

@ -433,6 +433,7 @@ template <class Impl>
void
DefaultIEW<Impl>::switchOut()
{
// IEW is ready to switch out at any time.
cpu->signalSwitched();
}
@ -440,6 +441,7 @@ template <class Impl>
void
DefaultIEW<Impl>::doSwitchOut()
{
// Clear any state.
switchedOut = true;
instQueue.switchOut();
@ -458,6 +460,7 @@ template <class Impl>
void
DefaultIEW<Impl>::takeOverFrom()
{
// Reset all state.
_status = Active;
exeStatus = Running;
wbStatus = Idle;
@ -571,6 +574,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
toCommit->squashedSeqNum[tid] = inst->seqNum;
toCommit->nextPC[tid] = inst->readPC();
// Must include the broadcasted SN in the squash.
toCommit->includeSquashInst[tid] = true;
ldstQueue.setLoadBlockedHandled(tid);
@ -1104,6 +1108,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
// Store conditionals need to be set as "canCommit()"
// so that commit can process them when they reach the
// head of commit.
// @todo: This is somewhat specific to Alpha.
inst->setCanCommit();
instQueue.insertNonSpec(inst);
add_to_iq = false;
@ -1363,6 +1368,7 @@ DefaultIEW<Impl>::executeInsts()
}
}
// Update and record activity if we processed any instructions.
if (inst_num) {
if (exeStatus == Idle) {
exeStatus = Running;
@ -1413,8 +1419,10 @@ DefaultIEW<Impl>::writebackInsts()
scoreboard->setReg(inst->renamedDestRegIdx(i));
}
producerInst[tid]++;
consumerInst[tid]+= dependents;
if (dependents) {
producerInst[tid]++;
consumerInst[tid]+= dependents;
}
writebackCount[tid]++;
}
}
@ -1485,6 +1493,7 @@ DefaultIEW<Impl>::tick()
DPRINTF(IEW,"Processing [tid:%i]\n",tid);
// Update structures based on instructions committed.
if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
!fromCommit->commitInfo[tid].squash &&
!fromCommit->commitInfo[tid].robSquashing) {

View file

@ -92,6 +92,9 @@ class InstructionQueue
/** Pointer back to the instruction queue. */
InstructionQueue<Impl> *iqPtr;
/** Should the FU be added to the list to be freed upon
* completing this event.
*/
bool freeFU;
public:
@ -116,6 +119,7 @@ class InstructionQueue
/** Registers statistics. */
void regStats();
/** Resets all instruction queue state. */
void resetState();
/** Sets CPU pointer. */
@ -133,10 +137,13 @@ class InstructionQueue
/** Sets the global time buffer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Switches out the instruction queue. */
void switchOut();
/** Takes over execution from another CPU's thread. */
void takeOverFrom();
/** Returns if the IQ is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Number of entries needed for given amount of threads. */
@ -171,6 +178,9 @@ class InstructionQueue
*/
void insertBarrier(DynInstPtr &barr_inst);
/** Returns the oldest scheduled instruction, and removes it from
* the list of instructions waiting to execute.
*/
DynInstPtr getInstToExecute();
/**
@ -274,13 +284,15 @@ class InstructionQueue
/** List of all the instructions in the IQ (some of which may be issued). */
std::list<DynInstPtr> instList[Impl::MaxThreads];
/** List of instructions that are ready to be executed. */
std::list<DynInstPtr> instsToExecute;
/**
* Struct for comparing entries to be added to the priority queue. This
* gives reverse ordering to the instructions in terms of sequence
* numbers: the instructions with smaller sequence numbers (and hence
* are older) will be at the top of the priority queue.
* Struct for comparing entries to be added to the priority queue.
* This gives reverse ordering to the instructions in terms of
* sequence numbers: the instructions with smaller sequence
* numbers (and hence are older) will be at the top of the
* priority queue.
*/
struct pqCompare {
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
@ -393,6 +405,7 @@ class InstructionQueue
*/
unsigned commitToIEWDelay;
/** Is the IQ switched out. */
bool switchedOut;
/** The sequence number of the squashed instruction. */
@ -460,19 +473,28 @@ class InstructionQueue
*/
Stats::Scalar<> iqSquashedNonSpecRemoved;
/** Distribution of number of instructions in the queue. */
Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction. */
Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.
*/
Stats::Vector<> statFuBusy;
// Stats::Vector<> dist_unissued;
/** Stat for total number issued for each instruction type. */
Stats::Vector2d<> statIssuedInstType;
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
// Stats::Formula issue_stores;
// Stats::Formula issue_op_rate;
Stats::Vector<> fuBusy; //cumulative fu busy
/** Number of times the FU was busy. */
Stats::Vector<> fuBusy;
/** Number of times the FU was busy per instruction issued. */
Stats::Formula fuBusyRate;
};

View file

@ -151,8 +151,10 @@ template <class Impl>
InstructionQueue<Impl>::~InstructionQueue()
{
dependGraph.reset();
#ifdef DEBUG
cprintf("Nodes traversed: %i, removed: %i\n",
dependGraph.nodesTraversed, dependGraph.nodesRemoved);
#endif
}
template <class Impl>
@ -669,14 +671,8 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
// @todo: Ensure that these FU Completions happen at the beginning
// of a cycle, otherwise they could add too many instructions to
// the queue.
// @todo: This could break if there's multiple multi-cycle ops
// finishing on this cycle. Maybe implement something like
// instToCommit in iew_impl.hh.
issueToExecuteQueue->access(0)->size++;
instsToExecute.push_back(inst);
// int &size = issueToExecuteQueue->access(0)->size;
// issueToExecuteQueue->access(0)->insts[size++] = inst;
}
// @todo: Figure out a better way to remove the squashed items from the
@ -742,9 +738,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
}
}
// If we have an instruction that doesn't require a FU, or a
// valid FU, then schedule for execution.
if (idx == -2 || idx != -1) {
if (op_latency == 1) {
// i2e_info->insts[exec_queue_slot++] = issuing_inst;
i2e_info->size++;
instsToExecute.push_back(issuing_inst);
@ -762,14 +759,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// @todo: Enforce that issue_latency == 1 or op_latency
if (issue_latency > 1) {
// If FU isn't pipelined, then it must be freed
// upon the execution completing.
execution->setFreeFU();
} else {
// @todo: Not sure I'm accounting for the
// multi-cycle op in a pipelined FU properly, or
// the number of instructions issued in one cycle.
// i2e_info->insts[exec_queue_slot++] = issuing_inst;
// i2e_info->size++;
// Add the FU onto the list of FU's to be freed next cycle.
fuPool->freeUnitNextCycle(idx);
}
@ -814,6 +807,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
numIssuedDist.sample(total_issued);
iqInstsIssued+= total_issued;
// If we issued any instructions, tell the CPU we had activity.
if (total_issued) {
cpu->activityThisCycle();
} else {
@ -1364,4 +1358,45 @@ InstructionQueue<Impl>::dumpInsts()
++num;
}
}
cprintf("Insts to Execute list:\n");
int num = 0;
int valid_num = 0;
ListIt inst_list_it = instsToExecute.begin();
while (inst_list_it != instsToExecute.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed
// still count towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it++;
++num;
}
}

View file

@ -49,6 +49,7 @@ class LSQ {
typedef typename Impl::CPUPol::IEW IEW;
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
/** SMT policy. */
enum LSQPolicy {
Dynamic,
Partitioned,
@ -69,8 +70,9 @@ class LSQ {
void setIEW(IEW *iew_ptr);
/** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr);
/** Switches out the LSQ. */
void switchOut();
/** Takes over execution from another CPU's thread. */
void takeOverFrom();
/** Number of entries needed for the given amount of threads.*/
@ -95,9 +97,6 @@ class LSQ {
/** Executes a load. */
Fault executeLoad(DynInstPtr &inst);
Fault executeLoad(int lq_idx, unsigned tid)
{ return thread[tid].executeLoad(lq_idx); }
/** Executes a store. */
Fault executeStore(DynInstPtr &inst);

View file

@ -112,10 +112,13 @@ class LSQUnit {
/** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr);
/** Switches out LSQ unit. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Returns if the LSQ is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Ticks the LSQ unit, which in this case only resets the number of
@ -180,12 +183,15 @@ class LSQUnit {
bool loadBlocked()
{ return isLoadBlocked; }
/** Clears the signal that a load became blocked. */
void clearLoadBlocked()
{ isLoadBlocked = false; }
/** Returns if the blocked load was handled. */
bool isLoadBlockedHandled()
{ return loadBlockedHandled; }
/** Records the blocked load as being handled. */
void setLoadBlockedHandled()
{ loadBlockedHandled = true; }
@ -331,6 +337,7 @@ class LSQUnit {
/** The number of used cache ports in this cycle. */
int usedPorts;
/** Is the LSQ switched out. */
bool switchedOut;
//list<InstSeqNum> mshrSeqNums;
@ -350,8 +357,10 @@ class LSQUnit {
/** Whether or not a load is blocked due to the memory system. */
bool isLoadBlocked;
/** Has the blocked load been handled. */
bool loadBlockedHandled;
/** The sequence number of the blocked load. */
InstSeqNum blockedLoadSeqNum;
/** The oldest load that caused a memory ordering violation. */
@ -452,10 +461,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
cpu->lockFlag = true;
}
#endif
req->cmd = Read;
assert(!req->completionEvent);
req->completionEvent = NULL;
req->time = curTick;
req->cmd = Read;
assert(!req->completionEvent);
req->completionEvent = NULL;
req->time = curTick;
while (store_idx != -1) {
// End once we've reached the top of the LSQ

View file

@ -477,7 +477,6 @@ LSQUnit<Impl>::commitLoad()
DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
loadQueue[loadHead]->readPC());
loadQueue[loadHead] = NULL;
incrLdIdx(loadHead);

View file

@ -84,8 +84,10 @@ class MemDepUnit {
/** Registers statistics. */
void regStats();
/** Switches out the memory dependence predictor. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Sets the pointer to the IQ. */
@ -155,10 +157,12 @@ class MemDepUnit {
: inst(new_inst), regsReady(false), memDepReady(false),
completed(false), squashed(false)
{
#ifdef DEBUG
++memdep_count;
DPRINTF(MemDepUnit, "Memory dependency entry created. "
"memdep_count=%i\n", memdep_count);
#endif
}
/** Frees any pointers. */
@ -167,11 +171,12 @@ class MemDepUnit {
for (int i = 0; i < dependInsts.size(); ++i) {
dependInsts[i] = NULL;
}
#ifdef DEBUG
--memdep_count;
DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
"memdep_count=%i\n", memdep_count);
#endif
}
/** Returns the name of the memory dependence entry. */
@ -196,9 +201,11 @@ class MemDepUnit {
bool squashed;
/** For debugging. */
#ifdef DEBUG
static int memdep_count;
static int memdep_insert;
static int memdep_erase;
#endif
};
/** Finds the memory dependence entry in the hash map. */
@ -227,9 +234,13 @@ class MemDepUnit {
*/
MemDepPred depPred;
/** Is there an outstanding load barrier that loads must wait on. */
bool loadBarrier;
/** The sequence number of the load barrier. */
InstSeqNum loadBarrierSN;
/** Is there an outstanding store barrier that loads must wait on. */
bool storeBarrier;
/** The sequence number of the store barrier. */
InstSeqNum storeBarrierSN;
/** Pointer to the IQ. */

View file

@ -105,6 +105,7 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
// Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();
}
@ -116,6 +117,7 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
{
// Be sure to reset all state.
loadBarrier = storeBarrier = false;
loadBarrierSN = storeBarrierSN = 0;
depPred.clear();
@ -146,7 +148,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
inst_entry->listIt = --(instList[tid].end());
// Check any barriers and the dependence predictor for any
// producing stores.
// producing memrefs/stores.
InstSeqNum producing_store;
if (inst->isLoad() && loadBarrier) {
producing_store = loadBarrierSN;
@ -253,6 +255,7 @@ void
MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
{
InstSeqNum barr_sn = barr_inst->seqNum;
// Memory barriers block loads and stores, write barriers only stores.
if (barr_inst->isMemBarrier()) {
loadBarrier = true;
loadBarrierSN = barr_sn;
@ -330,6 +333,7 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
DynInstPtr temp_inst;
bool found_inst = false;
// For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
temp_inst = instsToReplay.front();

View file

@ -155,10 +155,13 @@ class DefaultRename
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *_scoreboard);
/** Switches out the rename stage. */
void switchOut();
/** Completes the switch out. */
void doSwitchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Squashes all instructions in a thread. */
@ -243,8 +246,10 @@ class DefaultRename
/** Checks if any stages are telling rename to block. */
bool checkStall(unsigned tid);
/** Gets the number of free entries for a specific thread. */
void readFreeEntries(unsigned tid);
/** Checks the signals and updates the status. */
bool checkSignalsAndUpdate(unsigned tid);
/** Either serializes on the next instruction available in the InstQueue,
@ -454,8 +459,11 @@ class DefaultRename
Stats::Scalar<> renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
Stats::Scalar<> renameUndoneMaps;
/** Number of serialize instructions handled. */
Stats::Scalar<> renamedSerializing;
/** Number of instructions marked as temporarily serializing. */
Stats::Scalar<> renamedTempSerializing;
/** Number of instructions inserted into skid buffers. */
Stats::Scalar<> renameSkidInsts;
};

View file

@ -258,6 +258,7 @@ template <class Impl>
void
DefaultRename<Impl>::switchOut()
{
// Rename is ready to switch out at any time.
cpu->signalSwitched();
}
@ -265,6 +266,7 @@ template <class Impl>
void
DefaultRename<Impl>::doSwitchOut()
{
// Clear any state, fix up the rename map.
for (int i = 0; i < numThreads; i++) {
typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();

View file

@ -62,12 +62,13 @@ class SimpleRenameMap
typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
public:
//Constructor
SimpleRenameMap() {};
/** Default constructor. init() must be called prior to use. */
SimpleRenameMap() {};
/** Destructor. */
~SimpleRenameMap();
/** Initializes rename map with given parameters. */
void init(unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
PhysRegIndex &_int_reg_start,
@ -84,6 +85,7 @@ class SimpleRenameMap
int id,
bool bindRegs);
/** Sets the free list used with this rename map. */
void setFreeList(SimpleFreeList *fl_ptr);
//Tell rename map to get a free physical register for a given
@ -149,7 +151,6 @@ class SimpleRenameMap
{ }
};
//Change this to private
private:
/** Integer rename map. */
std::vector<RenameEntry> intRenameMap;

View file

@ -95,8 +95,10 @@ class ROB
*/
void setActiveThreads(std::list<unsigned>* at_ptr);
/** Switches out the ROB. */
void switchOut();
/** Takes over another CPU's thread. */
void takeOverFrom();
/** Function to insert an instruction into the ROB. Note that whatever
@ -298,6 +300,7 @@ class ROB
/** Number of instructions in the ROB. */
int numInstsInROB;
/** Dummy instruction returned if there are no insts left. */
DynInstPtr dummyInst;
private:

View file

@ -26,6 +26,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "base/intmath.hh"
#include "base/trace.hh"
#include "cpu/o3/store_set.hh"
@ -36,6 +37,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
SSITSize, LFSTSize);
if (!isPowerOf2(SSITSize)) {
fatal("Invalid SSIT size!\n");
}
SSIT.resize(SSITSize);
validSSIT.resize(SSITSize);
@ -43,6 +48,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
for (int i = 0; i < SSITSize; ++i)
validSSIT[i] = false;
if (!isPowerOf2(LFSTSize)) {
fatal("Invalid LFST size!\n");
}
LFST.resize(LFSTSize);
validLFST.resize(LFSTSize);
@ -318,3 +327,19 @@ StoreSet::clear()
storeList.clear();
}
void
StoreSet::dump()
{
cprintf("storeList.size(): %i\n", storeList.size());
SeqNumMapIt store_list_it = storeList.begin();
int num = 0;
while (store_list_it != storeList.end()) {
cprintf("%i: [sn:%lli] SSID:%i\n",
num, (*store_list_it).first, (*store_list_it).second);
num++;
store_list_it++;
}
}

View file

@ -44,58 +44,98 @@ struct ltseqnum {
}
};
/**
* Implements a store set predictor for determining if memory
* instructions are dependent upon each other. See paper "Memory
* Dependence Prediction using Store Sets" by Chrysos and Emer. SSID
* stands for Store Set ID, SSIT stands for Store Set ID Table, and
* LFST is Last Fetched Store Table.
*/
class StoreSet
{
public:
typedef unsigned SSID;
public:
/** Default constructor. init() must be called prior to use. */
StoreSet() { };
/** Creates store set predictor with given table sizes. */
StoreSet(int SSIT_size, int LFST_size);
/** Default destructor. */
~StoreSet();
/** Initializes the store set predictor with the given table sizes. */
void init(int SSIT_size, int LFST_size);
/** Records a memory ordering violation between the younger load
* and the older store. */
void violation(Addr store_PC, Addr load_PC);
/** Inserts a load into the store set predictor. This does nothing but
* is included in case other predictors require a similar function.
*/
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
/** Inserts a store into the store set predictor. Updates the
* LFST if the store has a valid SSID. */
void insertStore(Addr store_PC, InstSeqNum store_seq_num,
unsigned tid);
/** Checks if the instruction with the given PC is dependent upon
* any store. @return Returns the sequence number of the store
* instruction this PC is dependent upon. Returns 0 if none.
*/
InstSeqNum checkInst(Addr PC);
/** Records this PC/sequence number as issued. */
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
/** Squashes for a specific thread until the given sequence number. */
void squash(InstSeqNum squashed_num, unsigned tid);
/** Resets all tables. */
void clear();
/** Debug function to dump the contents of the store list. */
void dump();
private:
/** Calculates the index into the SSIT based on the PC. */
inline int calcIndex(Addr PC)
{ return (PC >> offsetBits) & indexMask; }
/** Calculates a Store Set ID based on the PC. */
inline SSID calcSSID(Addr PC)
{ return ((PC ^ (PC >> 10)) % LFSTSize); }
/** The Store Set ID Table. */
std::vector<SSID> SSIT;
/** Bit vector to tell if the SSIT has a valid entry. */
std::vector<bool> validSSIT;
/** Last Fetched Store Table. */
std::vector<InstSeqNum> LFST;
/** Bit vector to tell if the LFST has a valid entry. */
std::vector<bool> validLFST;
/** Map of stores that have been inserted into the store set, but
* not yet issued or squashed.
*/
std::map<InstSeqNum, int, ltseqnum> storeList;
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
/** Store Set ID Table size, in entries. */
int SSITSize;
/** Last Fetched Store Table size, in entries. */
int LFSTSize;
/** Mask to obtain the index. */
int indexMask;
// HACK: Hardcoded for now.

View file

@ -58,16 +58,26 @@ struct O3ThreadState : public ThreadState {
typedef ExecContext::Status Status;
typedef typename Impl::FullCPU FullCPU;
/** Current status of the thread. */
Status _status;
// Current instruction
/** Current instruction the thread is committing. Only set and
* used for DTB faults currently.
*/
TheISA::MachInst inst;
private:
/** Pointer to the CPU. */
FullCPU *cpu;
public:
/** Whether or not the thread is currently in syscall mode, and
* thus able to be externally updated without squashing.
*/
bool inSyscall;
/** Whether or not the thread is currently waiting on a trap, and
* thus able to be externally updated without squashing.
*/
bool trapPending;
#if FULL_SYSTEM
@ -88,31 +98,44 @@ struct O3ThreadState : public ThreadState {
{ }
#endif
/** Pointer to the ExecContext of this thread. @todo: Don't call
this a proxy.*/
ExecContext *xcProxy;
/** Returns a pointer to the XC of this thread. */
ExecContext *getXCProxy() { return xcProxy; }
/** Returns the status of this thread. */
Status status() const { return _status; }
/** Sets the status of this thread. */
void setStatus(Status new_status) { _status = new_status; }
#if !FULL_SYSTEM
/** Returns if this address is a valid instruction address. */
bool validInstAddr(Addr addr)
{ return process->validInstAddr(addr); }
/** Returns if this address is a valid data address. */
bool validDataAddr(Addr addr)
{ return process->validDataAddr(addr); }
#endif
bool misspeculating() { return false; }
/** Sets the current instruction being committed. */
void setInst(TheISA::MachInst _inst) { inst = _inst; }
/** Reads the number of instructions functionally executed and
* committed.
*/
Counter readFuncExeInst() { return funcExeInst; }
/** Sets the total number of instructions functionally executed
* and committed.
*/
void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
#if !FULL_SYSTEM
/** Handles the syscall. */
void syscall() { process->syscall(xcProxy); }
#endif
};