Updates to bring CPU portion of m5 up-to-date with newmem.

--HG--
extra : convert_revision : 00e6eefb24e6ffd9c7c5d8165db26fbf6199fdc4
This commit is contained in:
Kevin Lim 2006-08-02 12:05:34 -04:00
parent 8d220c5c10
commit cbfbb7bc56
37 changed files with 691 additions and 580 deletions

View file

@ -212,6 +212,11 @@ class TimeBuffer
{
return wire(this, 0);
}
int getSize()
{
return size;
}
};
#endif // __BASE_TIMEBUF_HH__

View file

@ -100,32 +100,15 @@ BaseDynInst<Impl>::initVars()
readyRegs = 0;
completed = false;
resultReady = false;
canIssue = false;
issued = false;
executed = false;
canCommit = false;
committed = false;
squashed = false;
squashedInIQ = false;
squashedInLSQ = false;
squashedInROB = false;
instResult.integer = 0;
status.reset();
eaCalcDone = false;
memOpDone = false;
lqIdx = -1;
sqIdx = -1;
reachedCommit = false;
blockingInst = false;
recoverInst = false;
iqEntry = false;
robEntry = false;
serializeBefore = false;
serializeAfter = false;
serializeHandled = false;
// Eventually make this a parameter.
threadNumber = 0;
@ -395,7 +378,7 @@ void
BaseDynInst<Impl>::markSrcRegReady()
{
if (++readyRegs == numSrcRegs()) {
canIssue = true;
status.set(CanIssue);
}
}
@ -403,13 +386,9 @@ template <class Impl>
void
BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx)
{
++readyRegs;
_readySrcRegIdx[src_idx] = true;
if (readyRegs == numSrcRegs()) {
canIssue = true;
}
markSrcRegReady();
}
template <class Impl>

View file

@ -127,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** The sequence number of the instruction. */
InstSeqNum seqNum;
/** Is the instruction in the IQ */
bool iqEntry;
enum Status {
IqEntry, /// Instruction is in the IQ
RobEntry, /// Instruction is in the ROB
LsqEntry, /// Instruction is in the LSQ
Completed, /// Instruction has completed
ResultReady, /// Instruction has its result
CanIssue, /// Instruction can issue and execute
Issued, /// Instruction has issued
Executed, /// Instruction has executed
CanCommit, /// Instruction can commit
AtCommit, /// Instruction has reached commit
Committed, /// Instruction has committed
Squashed, /// Instruction is squashed
SquashedInIQ, /// Instruction is squashed in the IQ
SquashedInLSQ, /// Instruction is squashed in the LSQ
SquashedInROB, /// Instruction is squashed in the ROB
RecoverInst, /// Is a recover instruction
BlockingInst, /// Is a blocking instruction
ThreadsyncWait, /// Is a thread synchronization instruction
SerializeBefore, /// Needs to serialize on
/// instructions ahead of it
SerializeAfter, /// Needs to serialize instructions behind it
SerializeHandled, /// Serialization has been handled
NumStatus
};
/** Is the instruction in the ROB */
bool robEntry;
/** Is the instruction in the LSQ */
bool lsqEntry;
/** Is the instruction completed. */
bool completed;
/** Is the instruction's result ready. */
bool resultReady;
/** Can this instruction issue. */
bool canIssue;
/** Has this instruction issued. */
bool issued;
/** Has this instruction executed (or made it through execute) yet. */
bool executed;
/** Can this instruction commit. */
bool canCommit;
/** Is this instruction committed. */
bool committed;
/** Is this instruction squashed. */
bool squashed;
/** Is this instruction squashed in the instruction queue. */
bool squashedInIQ;
/** Is this instruction squashed in the instruction queue. */
bool squashedInLSQ;
/** Is this instruction squashed in the instruction queue. */
bool squashedInROB;
/** Is this a recover instruction. */
bool recoverInst;
/** Is this a thread blocking instruction. */
bool blockingInst; /* this inst has called thread_block() */
/** Is this a thread syncrhonization instruction. */
bool threadsyncWait;
/** The status of this BaseDynInst. Several bits can be set. */
std::bitset<NumStatus> status;
/** The thread this instruction is from. */
short threadNumber;
@ -351,9 +329,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
bool isThreadSync() const { return staticInst->isThreadSync(); }
bool isSerializing() const { return staticInst->isSerializing(); }
bool isSerializeBefore() const
{ return staticInst->isSerializeBefore() || serializeBefore; }
{ return staticInst->isSerializeBefore() || status[SerializeBefore]; }
bool isSerializeAfter() const
{ return staticInst->isSerializeAfter() || serializeAfter; }
{ return staticInst->isSerializeAfter() || status[SerializeAfter]; }
bool isMemBarrier() const { return staticInst->isMemBarrier(); }
bool isWriteBarrier() const { return staticInst->isWriteBarrier(); }
bool isNonSpeculative() const { return staticInst->isNonSpeculative(); }
@ -362,41 +340,32 @@ class BaseDynInst : public FastAlloc, public RefCounted
bool isUnverifiable() const { return staticInst->isUnverifiable(); }
/** Temporarily sets this instruction as a serialize before instruction. */
void setSerializeBefore() { serializeBefore = true; }
void setSerializeBefore() { status.set(SerializeBefore); }
/** Clears the serializeBefore part of this instruction. */
void clearSerializeBefore() { serializeBefore = false; }
void clearSerializeBefore() { status.reset(SerializeBefore); }
/** Checks if this serializeBefore is only temporarily set. */
bool isTempSerializeBefore() { return serializeBefore; }
/** Tracks if instruction has been externally set as serializeBefore. */
bool serializeBefore;
bool isTempSerializeBefore() { return status[SerializeBefore]; }
/** Temporarily sets this instruction as a serialize after instruction. */
void setSerializeAfter() { serializeAfter = true; }
void setSerializeAfter() { status.set(SerializeAfter); }
/** Clears the serializeAfter part of this instruction.*/
void clearSerializeAfter() { serializeAfter = false; }
void clearSerializeAfter() { status.reset(SerializeAfter); }
/** Checks if this serializeAfter is only temporarily set. */
bool isTempSerializeAfter() { return serializeAfter; }
bool isTempSerializeAfter() { return status[SerializeAfter]; }
/** Tracks if instruction has been externally set as serializeAfter. */
bool serializeAfter;
/** Sets the serialization part of this instruction as handled. */
void setSerializeHandled() { status.set(SerializeHandled); }
/** Checks if the serialization part of this instruction has been
* handled. This does not apply to the temporary serializing
* state; it only applies to this instruction's own permanent
* serializing state.
*/
bool isSerializeHandled() { return serializeHandled; }
/** Sets the serialization part of this instruction as handled. */
void setSerializeHandled() { serializeHandled = true; }
/** Whether or not the serialization of this instruction has been handled. */
bool serializeHandled;
bool isSerializeHandled() { return status[SerializeHandled]; }
/** Returns the opclass of this instruction. */
OpClass opClass() const { return staticInst->opClass(); }
@ -463,106 +432,112 @@ class BaseDynInst : public FastAlloc, public RefCounted
}
/** Sets this instruction as completed. */
void setCompleted() { completed = true; }
void setCompleted() { status.set(Completed); }
/** Returns whether or not this instruction is completed. */
bool isCompleted() const { return completed; }
bool isCompleted() const { return status[Completed]; }
void setResultReady() { resultReady = true; }
/** Marks the result as ready. */
void setResultReady() { status.set(ResultReady); }
bool isResultReady() const { return resultReady; }
/** Returns whether or not the result is ready. */
bool isResultReady() const { return status[ResultReady]; }
/** Sets this instruction as ready to issue. */
void setCanIssue() { canIssue = true; }
void setCanIssue() { status.set(CanIssue); }
/** Returns whether or not this instruction is ready to issue. */
bool readyToIssue() const { return canIssue; }
bool readyToIssue() const { return status[CanIssue]; }
/** Sets this instruction as issued from the IQ. */
void setIssued() { issued = true; }
void setIssued() { status.set(Issued); }
/** Returns whether or not this instruction has issued. */
bool isIssued() const { return issued; }
bool isIssued() const { return status[Issued]; }
/** Sets this instruction as executed. */
void setExecuted() { executed = true; }
void setExecuted() { status.set(Executed); }
/** Returns whether or not this instruction has executed. */
bool isExecuted() const { return executed; }
bool isExecuted() const { return status[Executed]; }
/** Sets this instruction as ready to commit. */
void setCanCommit() { canCommit = true; }
void setCanCommit() { status.set(CanCommit); }
/** Clears this instruction as being ready to commit. */
void clearCanCommit() { canCommit = false; }
void clearCanCommit() { status.reset(CanCommit); }
/** Returns whether or not this instruction is ready to commit. */
bool readyToCommit() const { return canCommit; }
bool readyToCommit() const { return status[CanCommit]; }
void setAtCommit() { status.set(AtCommit); }
bool isAtCommit() { return status[AtCommit]; }
/** Sets this instruction as committed. */
void setCommitted() { committed = true; }
void setCommitted() { status.set(Committed); }
/** Returns whether or not this instruction is committed. */
bool isCommitted() const { return committed; }
bool isCommitted() const { return status[Committed]; }
/** Sets this instruction as squashed. */
void setSquashed() { squashed = true; }
void setSquashed() { status.set(Squashed); }
/** Returns whether or not this instruction is squashed. */
bool isSquashed() const { return squashed; }
bool isSquashed() const { return status[Squashed]; }
//Instruction Queue Entry
//-----------------------
/** Sets this instruction as a entry the IQ. */
void setInIQ() { iqEntry = true; }
void setInIQ() { status.set(IqEntry); }
/** Sets this instruction as a entry the IQ. */
void removeInIQ() { iqEntry = false; }
/** Sets this instruction as squashed in the IQ. */
void setSquashedInIQ() { squashedInIQ = true; squashed = true;}
/** Returns whether or not this instruction is squashed in the IQ. */
bool isSquashedInIQ() const { return squashedInIQ; }
void clearInIQ() { status.reset(IqEntry); }
/** Returns whether or not this instruction has issued. */
bool isInIQ() const { return iqEntry; }
bool isInIQ() const { return status[IqEntry]; }
/** Sets this instruction as squashed in the IQ. */
void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);}
/** Returns whether or not this instruction is squashed in the IQ. */
bool isSquashedInIQ() const { return status[SquashedInIQ]; }
//Load / Store Queue Functions
//-----------------------
/** Sets this instruction as a entry the LSQ. */
void setInLSQ() { lsqEntry = true; }
void setInLSQ() { status.set(LsqEntry); }
/** Sets this instruction as a entry the LSQ. */
void removeInLSQ() { lsqEntry = false; }
/** Sets this instruction as squashed in the LSQ. */
void setSquashedInLSQ() { squashedInLSQ = true;}
/** Returns whether or not this instruction is squashed in the LSQ. */
bool isSquashedInLSQ() const { return squashedInLSQ; }
void removeInLSQ() { status.reset(LsqEntry); }
/** Returns whether or not this instruction is in the LSQ. */
bool isInLSQ() const { return lsqEntry; }
bool isInLSQ() const { return status[LsqEntry]; }
/** Sets this instruction as squashed in the LSQ. */
void setSquashedInLSQ() { status.set(SquashedInLSQ);}
/** Returns whether or not this instruction is squashed in the LSQ. */
bool isSquashedInLSQ() const { return status[SquashedInLSQ]; }
//Reorder Buffer Functions
//-----------------------
/** Sets this instruction as a entry the ROB. */
void setInROB() { robEntry = true; }
void setInROB() { status.set(RobEntry); }
/** Sets this instruction as a entry the ROB. */
void removeInROB() { robEntry = false; }
/** Sets this instruction as squashed in the ROB. */
void setSquashedInROB() { squashedInROB = true; }
/** Returns whether or not this instruction is squashed in the ROB. */
bool isSquashedInROB() const { return squashedInROB; }
void clearInROB() { status.reset(RobEntry); }
/** Returns whether or not this instruction is in the ROB. */
bool isInROB() const { return robEntry; }
bool isInROB() const { return status[RobEntry]; }
/** Sets this instruction as squashed in the ROB. */
void setSquashedInROB() { status.set(SquashedInROB); }
/** Returns whether or not this instruction is squashed in the ROB. */
bool isSquashedInROB() const { return status[SquashedInROB]; }
/** Read the PC of this instruction. */
const Addr readPC() const { return PC; }
@ -619,8 +594,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Store queue index. */
int16_t sqIdx;
bool reachedCommit;
/** Iterator pointing to this BaseDynInst in the list of all insts. */
ListIt instListIt;
@ -636,7 +609,7 @@ template<class T>
inline Fault
BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
{
if (executed) {
if (status[Executed]) {
fault = cpu->read(req, data, lqIdx);
return fault;
}

View file

@ -94,12 +94,10 @@ Param<unsigned> renameWidth;
Param<unsigned> commitToIEWDelay;
Param<unsigned> renameToIEWDelay;
Param<unsigned> issueToExecuteDelay;
Param<unsigned> dispatchWidth;
Param<unsigned> issueWidth;
Param<unsigned> executeWidth;
Param<unsigned> executeIntWidth;
Param<unsigned> executeFloatWidth;
Param<unsigned> executeBranchWidth;
Param<unsigned> executeMemoryWidth;
Param<unsigned> wbWidth;
Param<unsigned> wbDepth;
SimObjectParam<FUPool *> fuPool;
Param<unsigned> iewToCommitDelay;
@ -109,6 +107,9 @@ Param<unsigned> squashWidth;
Param<Tick> trapLatency;
Param<Tick> fetchTrapLatency;
Param<unsigned> backComSize;
Param<unsigned> forwardComSize;
Param<std::string> predType;
Param<unsigned> localPredictorSize;
Param<unsigned> localCtrBits;
@ -219,12 +220,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
"Issue/Execute/Writeback delay"),
INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
"to the IEW stage)"),
INIT_PARAM(dispatchWidth, "Dispatch width"),
INIT_PARAM(issueWidth, "Issue width"),
INIT_PARAM(executeWidth, "Execute width"),
INIT_PARAM(executeIntWidth, "Integer execute width"),
INIT_PARAM(executeFloatWidth, "Floating point execute width"),
INIT_PARAM(executeBranchWidth, "Branch execute width"),
INIT_PARAM(executeMemoryWidth, "Memory execute width"),
INIT_PARAM(wbWidth, "Writeback width"),
INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
@ -235,6 +234,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
INIT_PARAM(localCtrBits, "Bits per counter"),
@ -353,12 +355,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->commitToIEWDelay = commitToIEWDelay;
params->renameToIEWDelay = renameToIEWDelay;
params->issueToExecuteDelay = issueToExecuteDelay;
params->dispatchWidth = dispatchWidth;
params->issueWidth = issueWidth;
params->executeWidth = executeWidth;
params->executeIntWidth = executeIntWidth;
params->executeFloatWidth = executeFloatWidth;
params->executeBranchWidth = executeBranchWidth;
params->executeMemoryWidth = executeMemoryWidth;
params->wbWidth = wbWidth;
params->wbDepth = wbDepth;
params->fuPool = fuPool;
params->iewToCommitDelay = iewToCommitDelay;
@ -368,6 +368,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->trapLatency = trapLatency;
params->fetchTrapLatency = fetchTrapLatency;
params->backComSize = backComSize;
params->forwardComSize = forwardComSize;
params->predType = predType;
params->localPredictorSize = localPredictorSize;
params->localCtrBits = localCtrBits;

View file

@ -383,7 +383,7 @@ AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc)
}
// Copy the misc regs.
cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
TheISA::copyMiscRegs(xc, this);
// Then finally set the PC and the next PC.
cpu->setPC(xc->readPC(), tid);

View file

@ -106,12 +106,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params
unsigned commitToIEWDelay;
unsigned renameToIEWDelay;
unsigned issueToExecuteDelay;
unsigned dispatchWidth;
unsigned issueWidth;
unsigned executeWidth;
unsigned executeIntWidth;
unsigned executeFloatWidth;
unsigned executeBranchWidth;
unsigned executeMemoryWidth;
unsigned wbWidth;
unsigned wbDepth;
FUPool *fuPool;
//
@ -124,6 +122,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params
Tick trapLatency;
Tick fetchTrapLatency;
//
// Timebuffer sizes
//
unsigned backComSize;
unsigned forwardComSize;
//
// Branch predictor (BP, BTB, RAS)
//

View file

@ -160,10 +160,6 @@ class DefaultCommit
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
void setFetchStage(Fetch *fetch_stage);
Fetch *fetchStage;
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
@ -367,11 +363,6 @@ class DefaultCommit
*/
unsigned renameWidth;
/** IEW width, in instructions. Used so ROB knows how many
* instructions to get from the IEW instruction queue.
*/
unsigned iewWidth;
/** Commit width, in instructions. */
unsigned commitWidth;
@ -392,10 +383,6 @@ class DefaultCommit
*/
Tick trapLatency;
Tick fetchTrapLatency;
Tick fetchFaultTick;
/** The commit PC of each thread. Refers to the instruction that
* is currently being processed/committed.
*/

View file

@ -71,12 +71,10 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
renameToROBDelay(params->renameToROBDelay),
fetchToCommitDelay(params->commitToFetchDelay),
renameWidth(params->renameWidth),
iewWidth(params->executeWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads),
switchedOut(false),
trapLatency(params->trapLatency),
fetchTrapLatency(params->fetchTrapLatency)
trapLatency(params->trapLatency)
{
_status = Active;
_nextStatus = Inactive;
@ -114,10 +112,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
changedROBNumEntries[i] = false;
trapSquash[i] = false;
xcSquash[i] = false;
PC[i] = nextPC[i] = 0;
}
fetchFaultTick = 0;
fetchTrapWait = 0;
}
template <class Impl>
@ -240,7 +236,6 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
cpu->activateStage(FullCPU::CommitIdx);
trapLatency = cpu->cycles(trapLatency);
fetchTrapLatency = cpu->cycles(fetchTrapLatency);
}
template <class Impl>
@ -297,13 +292,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
fromIEW = iewQueue->getWire(-iewToCommitDelay);
}
template <class Impl>
void
DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
{
fetchStage = fetch_stage;
}
template <class Impl>
void
DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
@ -431,7 +419,7 @@ DefaultCommit<Impl>::setNextStatus()
}
}
assert(squashes == squashCounter);
squashCounter = squashes;
// If commit is currently squashing, then it will have activity for the
// next cycle. Set its next status as active.
@ -536,8 +524,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
commitStatus[tid] = ROBSquashing;
cpu->activityThisCycle();
++squashCounter;
}
template <class Impl>
@ -555,8 +541,6 @@ DefaultCommit<Impl>::squashFromXC(unsigned tid)
cpu->activityThisCycle();
xcSquash[tid] = false;
++squashCounter;
}
template <class Impl>
@ -571,6 +555,9 @@ DefaultCommit<Impl>::tick()
return;
}
if ((*activeThreads).size() <=0)
return;
list<unsigned>::iterator threads = (*activeThreads).begin();
// Check if any of the threads are done squashing. Change the
@ -582,10 +569,12 @@ DefaultCommit<Impl>::tick()
if (rob->isDoneSquashing(tid)) {
commitStatus[tid] = Running;
--squashCounter;
} else {
DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
"insts this cycle.\n", tid);
" insts this cycle.\n", tid);
rob->doSquash(tid);
toIEW->commitInfo[tid].robSquashing = true;
wroteToTimeBuffer = true;
}
}
}
@ -691,29 +680,7 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) {
unsigned tid = *threads++;
/*
if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
// Record the fault. Wait until it's empty in the ROB.
// Then handle the trap. Ignore it if there's already a
// trap pending as fetch will be redirected.
fetchFault = fromFetch->fetchFault;
fetchFaultTick = curTick + fetchTrapLatency;
commitStatus[0] = FetchTrapPending;
DPRINTF(Commit, "Fault from fetch recorded. Will trap if the "
"ROB empties without squashing the fault.\n");
fetchTrapWait = 0;
}
// Fetch may tell commit to clear the trap if it's been squashed.
if (fromFetch->clearFetchFault) {
DPRINTF(Commit, "Received clear fetch fault signal\n");
fetchTrapWait = 0;
if (commitStatus[0] == FetchTrapPending) {
DPRINTF(Commit, "Clearing fault from fetch\n");
commitStatus[0] = Running;
}
}
*/
// Not sure which one takes priority. I think if we have
// both, that's a bad sign.
if (trapSquash[tid] == true) {
@ -741,8 +708,6 @@ DefaultCommit<Impl>::commit()
commitStatus[tid] = ROBSquashing;
++squashCounter;
// If we want to include the squashing instruction in the squash,
// then use one older sequence number.
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
@ -944,7 +909,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// and committed this instruction.
thread[tid]->funcExeInst--;
head_inst->reachedCommit = true;
head_inst->setAtCommit();
if (head_inst->isNonSpeculative() ||
head_inst->isStoreConditional() ||
@ -1060,7 +1025,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
#else // !FULL_SYSTEM
panic("fault (%d) detected @ PC %08p", inst_fault,
@ -1083,6 +1048,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
if (head_inst->isCopy())
panic("Should not commit any copy instructions!");
// Finally clear the head ROB entry.
rob->retireHead(tid);

View file

@ -108,12 +108,14 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
// For now just have these time buffers be pretty big.
// @todo: Make these time buffer sizes parameters or derived
// from latencies
timeBuffer(5, 5),
fetchQueue(5, 5),
decodeQueue(5, 5),
renameQueue(5, 5),
iewQueue(5, 5),
activityRec(NumStages, 10, params->activity),
timeBuffer(params->backComSize, params->forwardComSize),
fetchQueue(params->backComSize, params->forwardComSize),
decodeQueue(params->backComSize, params->forwardComSize),
renameQueue(params->backComSize, params->forwardComSize),
iewQueue(params->backComSize, params->forwardComSize),
activityRec(NumStages,
params->backComSize + params->forwardComSize,
params->activity),
globalSeqNum(1),
@ -180,7 +182,6 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
commit.setIEWQueue(&iewQueue);
commit.setRenameQueue(&renameQueue);
commit.setFetchStage(&fetch);
commit.setIEWStage(&iew);
rename.setIEWStage(&iew);
rename.setCommitStage(&commit);
@ -709,7 +710,7 @@ void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
// Flush out any old data from the time buffers.
for (int i = 0; i < 10; ++i) {
for (int i = 0; i < timeBuffer.getSize(); ++i) {
timeBuffer.advance();
fetchQueue.advance();
decodeQueue.advance();
@ -758,6 +759,46 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick);
}
/*
template <class Impl>
void
FullO3CPU<Impl>::serialize(std::ostream &os)
{
BaseCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
// Use SimpleThread's ability to checkpoint to make it easier to
// write out the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static SimpleThread temp;
for (int i = 0; i < thread.size(); i++) {
nameOut(os, csprintf("%s.xc.%i", name(), i));
temp.copyXC(thread[i]->getXC());
temp.serialize(os);
}
}
template <class Impl>
void
FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
BaseCPU::unserialize(cp, section);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
// Use SimpleThread's ability to checkpoint to make it easier to
// read in the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static SimpleThread temp;
for (int i = 0; i < thread.size(); i++) {
temp.copyXC(thread[i]->getXC());
temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
thread[i]->getXC()->copyArchRegs(temp.getXC());
}
}
*/
template <class Impl>
uint64_t
FullO3CPU<Impl>::readIntReg(int reg_idx)
@ -866,7 +907,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegSingle(phys_reg, val);
}
@ -875,7 +917,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegDouble(phys_reg, val);
}
@ -884,7 +927,8 @@ template <class Impl>
void
FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
regFile.setFloatRegInt(phys_reg, val);
}

View file

@ -63,6 +63,12 @@ class BaseFullCPU : public BaseCPU
void regStats();
/** Sets this CPU's ID. */
void setCpuId(int id) { cpu_id = id; }
/** Reads this CPU's ID. */
int readCpuId() { return cpu_id; }
protected:
int cpu_id;
};

View file

@ -278,7 +278,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
@ -294,7 +294,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid &&
fromFetch->insts[i]->seqNum > inst->seqNum) {
fromFetch->insts[i]->squashed = true;
fromFetch->insts[i]->setSquashed();
}
}
@ -343,7 +343,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid) {
fromFetch->insts[i]->squashed = true;
fromFetch->insts[i]->setSquashed();
squash_count++;
}
}
@ -721,9 +721,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
inst->setNextPC(inst->branchTarget());
if (inst->mispredicted()) {
if (inst->branchTarget() != inst->readPredTarg()) {
++decodeBranchMispred;
// Might want to set some sort of boolean and just do

View file

@ -358,6 +358,12 @@ class DefaultFetch
/** The cache line being fetched. */
uint8_t *cacheData[Impl::MaxThreads];
/** The PC of the cacheline that has been loaded. */
Addr cacheDataPC[Impl::MaxThreads];
/** Whether or not the cache data is valid. */
bool cacheDataValid[Impl::MaxThreads];
/** Size of instructions. */
int instSize;

View file

@ -138,6 +138,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
cacheDataPC[tid] = 0;
cacheDataValid[tid] = false;
stalls[tid].decode = 0;
stalls[tid].rename = 0;
@ -334,6 +336,7 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Wake up the CPU (if it went to sleep and was waiting on this completion
// event).
cpu->wakeCPU();
cacheDataValid[tid] = true;
DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
tid);
@ -466,7 +469,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
if (interruptPending && flags == 0 || switchedOut) {
if (interruptPending && flags == 0) {
// Hold off fetch from getting new instructions while an interrupt
// is pending.
return false;
@ -475,6 +478,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Align the fetch PC so it's at the start of a cache block.
fetch_PC = icacheBlockAlignPC(fetch_PC);
// If we've already got the block, no need to try to fetch it again.
if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
return true;
}
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
memReq[tid] = new MemReq();
@ -525,6 +533,9 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
MemAccessResult result = icacheInterface->access(memReq[tid]);
cacheDataPC[tid] = fetch_PC;
cacheDataValid[tid] = false;
fetchedCacheLines++;
// If the cache missed, then schedule an event to wake
@ -1002,8 +1013,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
warn("%lli: Quiesce instruction encountered, halting fetch!",
curTick);
// warn("%lli: Quiesce instruction encountered, halting fetch!",
// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@ -1067,7 +1078,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
#endif // FULL_SYSTEM

View file

@ -224,6 +224,47 @@ class DefaultIEW
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
void incrWb(InstSeqNum &sn)
{
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
assert(wbOutstanding <= wbMax);
#ifdef DEBUG
wbList.insert(sn);
#endif
}
void decrWb(InstSeqNum &sn)
{
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
assert(wbOutstanding >= 0);
#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
#endif
}
#ifdef DEBUG
std::set<InstSeqNum> wbList;
void dumpWb()
{
std::set<InstSeqNum>::iterator wb_it = wbList.begin();
while (wb_it != wbList.end()) {
cprintf("[sn:%lli]\n",
(*wb_it));
wb_it++;
}
}
#endif
bool canIssue() { return ableToIssue; }
bool ableToIssue;
private:
/** Sends commit proper information for a squash due to a branch
* mispredict.
@ -281,6 +322,9 @@ class DefaultIEW
/** Processes inputs and changes state accordingly. */
void checkSignalsAndUpdate(unsigned tid);
/** Removes instructions from rename from a thread's instruction list. */
void emptyRenameInsts(unsigned tid);
/** Sorts instructions coming from rename into lists separated by thread. */
void sortInsts();
@ -401,20 +445,12 @@ class DefaultIEW
*/
unsigned issueToExecuteDelay;
/** Width of issue's read path, in instructions. The read path is both
* the skid buffer and the rename instruction queue.
* Note to self: is this really different than issueWidth?
*/
unsigned issueReadWidth;
/** Width of dispatch, in instructions. */
unsigned dispatchWidth;
/** Width of issue, in instructions. */
unsigned issueWidth;
/** Width of execute, in instructions. Might make more sense to break
* down into FP vs int.
*/
unsigned executeWidth;
/** Index into queue of instructions being written back. */
unsigned wbNumInst;
@ -425,6 +461,17 @@ class DefaultIEW
*/
unsigned wbCycle;
/** Number of instructions in flight that will writeback. */
int wbOutstanding;
/** Writeback width. */
unsigned wbWidth;
/** Writeback width * writeback depth, where writeback depth is
* the number of cycles of writing back instructions that can be
* buffered. */
unsigned wbMax;
/** Number of active threads. */
unsigned numThreads;
@ -459,14 +506,6 @@ class DefaultIEW
Stats::Scalar<> iewIQFullEvents;
/** Stat for number of times the LSQ becomes full. */
Stats::Scalar<> iewLSQFullEvents;
/** Stat for total number of executed instructions. */
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector<> iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Stat for total number of memory ordering violation events. */
Stats::Scalar<> memOrderViolationEvents;
/** Stat for total number of incorrect predicted taken branches. */
@ -476,28 +515,27 @@ class DefaultIEW
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
/** Stat for total number of executed instructions. */
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector<> iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */
Stats::Vector<> exeSwp;
Stats::Vector<> iewExecutedSwp;
/** Number of executed nops. */
Stats::Vector<> exeNop;
Stats::Vector<> iewExecutedNop;
/** Number of executed meomory references. */
Stats::Vector<> exeRefs;
Stats::Vector<> iewExecutedRefs;
/** Number of executed branches. */
Stats::Vector<> exeBranches;
// Stats::Vector<> issued_ops;
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
*/
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
Stats::Vector<> iewExecutedBranches;
/** Number of executed store instructions. */
Stats::Formula iewExecStoreInsts;
// Stats::Formula issue_op_rate;
// Stats::Formula fu_busy_rate;
/** Number of instructions executed per cycle. */
Stats::Formula iewExecRate;
/** Number of instructions sent to commit. */
Stats::Vector<> iewInstsToCommit;
/** Number of instructions that writeback. */
@ -510,7 +548,6 @@ class DefaultIEW
* to resource contention.
*/
Stats::Vector<> wbPenalized;
/** Number of instructions per cycle written back. */
Stats::Formula wbRate;
/** Average number of woken instructions per writeback. */

View file

@ -56,9 +56,11 @@ DefaultIEW<Impl>::LdWritebackEvent::process()
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
if (iewStage->isSwitchedOut()) {
iewStage->decrWb(inst->seqNum);
inst = NULL;
return;
} else if (inst->isSquashed()) {
iewStage->decrWb(inst->seqNum);
iewStage->wakeCPU();
inst = NULL;
return;
@ -93,16 +95,17 @@ DefaultIEW<Impl>::LdWritebackEvent::description()
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
: // @todo: Make this into a parameter.
issueToExecQueue(5, 5),
issueToExecQueue(params->backComSize, params->forwardComSize),
instQueue(params),
ldstQueue(params),
fuPool(params->fuPool),
commitToIEWDelay(params->commitToIEWDelay),
renameToIEWDelay(params->renameToIEWDelay),
issueToExecuteDelay(params->issueToExecuteDelay),
issueReadWidth(params->issueWidth),
dispatchWidth(params->dispatchWidth),
issueWidth(params->issueWidth),
executeWidth(params->executeWidth),
wbOutstanding(0),
wbWidth(params->wbWidth),
numThreads(params->numberOfThreads),
switchedOut(false)
{
@ -125,8 +128,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
fetchRedirect[i] = false;
}
wbMax = wbWidth * params->wbDepth;
updateLSQNextCycle = false;
ableToIssue = true;
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
}
@ -144,6 +151,7 @@ DefaultIEW<Impl>::regStats()
using namespace Stats;
instQueue.regStats();
ldstQueue.regStats();
iewIdleCycles
.name(name() + ".iewIdleCycles")
@ -189,20 +197,6 @@ DefaultIEW<Impl>::regStats()
.name(name() + ".iewLSQFullEvents")
.desc("Number of times the LSQ has become full, causing a stall");
iewExecutedInsts
.name(name() + ".iewExecutedInsts")
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
.name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
iewExecSquashedInsts
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
memOrderViolationEvents
.name(name() + ".memOrderViolationEvents")
.desc("Number of memory order violations");
@ -221,47 +215,49 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
exeSwp
iewExecutedInsts
.name(name() + ".iewExecutedInsts")
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
.name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
iewExecSquashedInsts
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
iewExecutedSwp
.init(cpu->number_of_threads)
.name(name() + ".EXEC:swp")
.desc("number of swp insts executed")
.flags(total)
;
.flags(total);
exeNop
iewExecutedNop
.init(cpu->number_of_threads)
.name(name() + ".EXEC:nop")
.desc("number of nop insts executed")
.flags(total)
;
.flags(total);
exeRefs
iewExecutedRefs
.init(cpu->number_of_threads)
.name(name() + ".EXEC:refs")
.desc("number of memory reference insts executed")
.flags(total)
;
.flags(total);
exeBranches
iewExecutedBranches
.init(cpu->number_of_threads)
.name(name() + ".EXEC:branches")
.desc("Number of branches executed")
.flags(total)
;
issueRate
.name(name() + ".EXEC:rate")
.desc("Inst execution rate")
.flags(total)
;
issueRate = iewExecutedInsts / cpu->numCycles;
.flags(total);
iewExecStoreInsts
.name(name() + ".EXEC:stores")
.desc("Number of stores executed")
.flags(total)
;
iewExecStoreInsts = exeRefs - iewExecLoadInsts;
.flags(total);
iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts;
/*
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
@ -277,58 +273,50 @@ DefaultIEW<Impl>::regStats()
.init(cpu->number_of_threads)
.name(name() + ".WB:sent")
.desc("cumulative count of insts sent to commit")
.flags(total)
;
.flags(total);
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
.flags(total);
producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
.flags(total);
consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
.flags(total);
wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
.flags(total)
;
.flags(total);
wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
.flags(total);
wbPenalizedRate = wbPenalized / writebackCount;
wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
.flags(total);
wbFanout = producerInst / consumerInst;
wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
.flags(total);
wbRate = writebackCount / cpu->numCycles;
}
@ -481,8 +469,7 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
// @todo: Fix hardcoded number
for (int i = 0; i < 6; ++i) {
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
}
@ -515,16 +502,7 @@ DefaultIEW<Impl>::squash(unsigned tid)
skidBuffer[tid].pop();
}
while (!insts[tid].empty()) {
if (insts[tid].front()->isLoad() ||
insts[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
}
toRename->iewInfo[tid].dispatched++;
insts[tid].pop();
}
emptyRenameInsts(tid);
}
template<class Impl>
@ -650,14 +628,16 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
// free slot.
while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
++wbNumInst;
if (wbNumInst == issueWidth) {
if (wbNumInst == wbWidth) {
++wbCycle;
wbNumInst = 0;
}
assert(wbCycle < 5);
assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@ -670,7 +650,7 @@ DefaultIEW<Impl>::validInstsFromRename()
unsigned inst_count = 0;
for (int i=0; i<fromRename->size; i++) {
if (!fromRename->insts[i]->squashed)
if (!fromRename->insts[i]->isSquashed())
inst_count++;
}
@ -858,10 +838,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid)
}
if (fromCommit->commitInfo[tid].robSquashing) {
DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n");
DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid);
dispatchStatus[tid] = Squashing;
emptyRenameInsts(tid);
wroteToTimeBuffer = true;
return;
}
@ -910,6 +892,22 @@ DefaultIEW<Impl>::sortInsts()
}
}
template <class Impl>
void
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
{
while (!insts[tid].empty()) {
if (insts[tid].front()->isLoad() ||
insts[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
}
toRename->iewInfo[tid].dispatched++;
insts[tid].pop();
}
}
template <class Impl>
void
DefaultIEW<Impl>::wakeCPU()
@ -1010,7 +1008,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
// Loop through the instructions, putting them in the instruction
// queue.
for ( ; dis_num_inst < insts_to_add &&
dis_num_inst < issueReadWidth;
dis_num_inst < dispatchWidth;
++dis_num_inst)
{
inst = insts_to_dispatch.front();
@ -1149,7 +1147,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
instQueue.recordProducer(inst);
exeNop[tid]++;
iewExecutedNop[tid]++;
add_to_iq = false;
} else if (inst->isExecuted()) {
@ -1263,6 +1261,7 @@ DefaultIEW<Impl>::executeInsts()
++iewExecSquashedInsts;
decrWb(inst->seqNum);
continue;
}
@ -1399,8 +1398,8 @@ DefaultIEW<Impl>::writebackInsts()
DynInstPtr inst = toCommit->insts[inst_num];
int tid = inst->threadNumber;
DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
inst->readPC());
DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
iewInstsToCommit[tid]++;
@ -1425,6 +1424,8 @@ DefaultIEW<Impl>::writebackInsts()
}
writebackCount[tid]++;
}
decrWb(inst->seqNum);
}
}
@ -1561,7 +1562,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
exeSwp[thread_number]++;
iewExecutedSwp[thread_number]++;
else
iewExecutedInsts++;
#else
@ -1572,13 +1573,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
// Control operations
//
if (inst->isControl())
exeBranches[thread_number]++;
iewExecutedBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
exeRefs[thread_number]++;
iewExecutedRefs[thread_number]++;
if (inst->isLoad()) {
iewExecLoadInsts[thread_number]++;

View file

@ -490,8 +490,6 @@ class InstructionQueue
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
// Stats::Formula issue_stores;
// Stats::Formula issue_op_rate;
/** Number of times the FU was busy. */
Stats::Vector<> fuBusy;
/** Number of times the FU was busy per instruction issued. */

View file

@ -288,22 +288,7 @@ InstructionQueue<Impl>::regStats()
.flags(total)
;
issueRate = iqInstsIssued / cpu->numCycles;
/*
issue_stores
.name(name() + ".ISSUE:stores")
.desc("Number of stores issued")
.flags(total)
;
issue_stores = exe_refs - exe_loads;
*/
/*
issue_op_rate
.name(name() + ".ISSUE:op_rate")
.desc("Operation issue rate")
.flags(total)
;
issue_op_rate = issued_ops / numCycles;
*/
statFuBusy
.init(Num_OpClasses)
.name(name() + ".ISSUE:fu_full")
@ -700,6 +685,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int total_issued = 0;
while (total_issued < totalWidth &&
iewStage->canIssue() &&
order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
@ -790,13 +776,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// complete.
++freeEntries;
count[tid]--;
issuing_inst->removeInIQ();
issuing_inst->clearInIQ();
} else {
memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
statIssuedInstType[tid][op_class]++;
iewStage->incrWb(issuing_inst->seqNum);
} else {
statFuBusy[op_class]++;
fuBusy[tid]++;
@ -1096,7 +1083,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
// inst will flow through the rest of the pipeline.
squashed_inst->setIssued();
squashed_inst->setCanCommit();
squashed_inst->removeInIQ();
squashed_inst->clearInIQ();
//Update Thread IQ Count
count[squashed_inst->threadNumber]--;

View file

@ -62,6 +62,9 @@ class LSQ {
/** Returns the name of the LSQ. */
std::string name() const;
/** Registers the statistics for each LSQ Unit. */
void regStats();
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
/** Sets the CPU pointer. */

View file

@ -104,6 +104,16 @@ LSQ<Impl>::name() const
return iewStage->name() + ".lsq";
}
template<class Impl>
void
LSQ<Impl>::regStats()
{
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
thread[tid].regStats();
}
}
template<class Impl>
void
LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)

View file

@ -101,6 +101,9 @@ class LSQUnit {
/** Returns the name of the LSQ unit. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
@ -153,9 +156,6 @@ class LSQUnit {
/** Writes back stores. */
void writebackStores();
// @todo: Include stats in the LSQ unit.
//void regStats();
/** Clears all the entries in the LQ. */
void clearLQ();
@ -369,25 +369,34 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
/*
// total number of loads forwaded from LSQ stores
Stats::Vector<> lsq_forw_loads;
/** Total number of loads forwaded from LSQ stores. */
Stats::Scalar<> lsqForwLoads;
// total number of loads ignored due to invalid addresses
Stats::Vector<> inv_addr_loads;
/** Total number of loads ignored due to invalid addresses. */
Stats::Scalar<> invAddrLoads;
// total number of software prefetches ignored due to invalid addresses
Stats::Vector<> inv_addr_swpfs;
/** Total number of squashed loads. */
Stats::Scalar<> lsqSquashedLoads;
// total non-speculative bogus addresses seen (debug var)
Counter sim_invalid_addrs;
Stats::Vector<> fu_busy; //cumulative fu busy
/** Total number of responses from the memory system that are
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
// ready loads blocked due to memory disambiguation
Stats::Vector<> lsq_blocked_loads;
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;
/** Total number of software prefetches ignored due to invalid addresses. */
Stats::Scalar<> invAddrSwpfs;
/** Ready loads blocked due to partial store-forwarding. */
Stats::Scalar<> lsqBlockedLoads;
/** Number of loads that were rescheduled. */
Stats::Scalar<> lsqRescheduledLoads;
/** Number of times the LSQ is blocked due to the cache. */
Stats::Scalar<> lsqCacheBlocked;
Stats::Scalar<> lsqInversion;
*/
public:
/** Executes the load at the given index. */
template <class T>
@ -441,8 +450,9 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
if (req->flags & UNCACHEABLE &&
(load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
(load_idx != loadHead || !loadQueue[load_idx]->isAtCommit())) {
iewStage->rescheduleMemInst(loadQueue[load_idx]);
++lsqRescheduledLoads;
return TheISA::genMachineCheckFault();
}
@ -552,6 +562,8 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(loadQueue[load_idx]);
iewStage->decrWb(loadQueue[load_idx]->seqNum);
++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
// complete.
@ -559,6 +571,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
"Store idx %i to load addr %#x\n",
store_idx, req->vaddr);
++lsqBlockedLoads;
return NoFault;
}
}
@ -579,6 +592,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// if we have a cache, do cache access too
if (fault == NoFault && dcacheInterface) {
if (dcacheInterface->isBlocked()) {
++lsqCacheBlocked;
iewStage->decrWb(inst->seqNum);
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
return NoFault;

View file

@ -124,6 +124,47 @@ LSQUnit<Impl>::name() const
}
}
template<class Impl>
void
LSQUnit<Impl>::regStats()
{
lsqForwLoads
.name(name() + ".forwLoads")
.desc("Number of loads that had data forwarded from stores");
invAddrLoads
.name(name() + ".invAddrLoads")
.desc("Number of loads ignored due to an invalid address");
lsqSquashedLoads
.name(name() + ".squashedLoads")
.desc("Number of loads squashed");
lsqIgnoredResponses
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
invAddrSwpfs
.name(name() + ".invAddrSwpfs")
.desc("Number of software prefetches ignored due to an invalid address");
lsqBlockedLoads
.name(name() + ".blockedLoads")
.desc("Number of blocked loads due to partial load-store forwarding");
lsqRescheduledLoads
.name(name() + ".rescheduledLoads")
.desc("Number of loads that were rescheduled");
lsqCacheBlocked
.name(name() + ".cacheBlocked")
.desc("Number of times an access to memory failed due to the cache being blocked");
}
template<class Impl>
void
LSQUnit<Impl>::clearLQ()
@ -548,6 +589,7 @@ LSQUnit<Impl>::writebackStores()
if (dcacheInterface && dcacheInterface->isBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
++lsqCacheBlocked;
break;
}
@ -705,7 +747,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
// Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->squashed = true;
loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
--loads;
@ -748,7 +790,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
// Clear the smart pointer to make sure it is decremented.
storeQueue[store_idx].inst->squashed = true;
storeQueue[store_idx].inst->setSquashed();
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
@ -765,6 +807,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
storeTail = store_idx;
decrStIdx(store_idx);
++lsqSquashedStores;
}
}

View file

@ -35,6 +35,7 @@
// AlphaSimpleImpl.
template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
#ifdef DEBUG
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
@ -44,3 +45,4 @@ MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
#endif

View file

@ -59,7 +59,9 @@ MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
}
}
#ifdef DEBUG
assert(MemDepEntry::memdep_count == 0);
#endif
}
template <class MemDepPred, class Impl>
@ -141,7 +143,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
instList[tid].push_back(inst);
@ -227,7 +231,9 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
// Insert the MemDepEntry into the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
// Add the instruction to the list.
instList[tid].push_back(inst);
@ -275,7 +281,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
// Add the instruction to the instruction list.
instList[tid].push_back(barr_inst);
@ -375,7 +383,9 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
#ifdef DEBUG
MemDepEntry::memdep_erase++;
#endif
}
template <class MemDepPred, class Impl>
@ -470,7 +480,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
#ifdef DEBUG
MemDepEntry::memdep_erase++;
#endif
instList[tid].erase(squash_it--);
}
@ -551,5 +563,7 @@ MemDepUnit<MemDepPred, Impl>::dumpLists()
cprintf("Memory dependence hash size: %i\n", memDepHash.size());
#ifdef DEBUG
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
#endif
}

View file

@ -223,10 +223,10 @@ class PhysRegFile
public:
/** (signed) integer register file. */
std::vector<IntReg> intRegFile;
IntReg *intRegFile;
/** Floating point register file. */
std::vector<FloatReg> floatRegFile;
FloatReg *floatRegFile;
/** Miscellaneous register file. */
MiscRegFile miscRegs[Impl::MaxThreads];
@ -256,11 +256,15 @@ PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
: numPhysicalIntRegs(_numPhysicalIntRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs)
{
intRegFile.resize(numPhysicalIntRegs);
floatRegFile.resize(numPhysicalFloatRegs);
intRegFile = new IntReg[numPhysicalIntRegs];
floatRegFile = new FloatReg[numPhysicalFloatRegs];
//memset(intRegFile, 0, sizeof(*intRegFile));
//memset(floatRegFile, 0, sizeof(*floatRegFile));
for (int i = 0; i < Impl::MaxThreads; ++i) {
miscRegs[i].clear();
}
memset(intRegFile, 0, sizeof(*intRegFile));
memset(floatRegFile, 0, sizeof(*floatRegFile));
}
#endif

View file

@ -348,7 +348,7 @@ DefaultRename<Impl>::squash(unsigned tid)
for (int i=0; i<fromDecode->size; i++) {
if (fromDecode->insts[i]->threadNumber == tid) {
fromDecode->insts[i]->squashed = true;
fromDecode->insts[i]->setSquashed();
wroteToTimeBuffer = true;
squashCount++;
}
@ -1029,7 +1029,7 @@ DefaultRename<Impl>::validInsts()
unsigned inst_count = 0;
for (int i=0; i<fromDecode->size; i++) {
if (!fromDecode->insts[i]->squashed)
if (!fromDecode->insts[i]->isSquashed())
inst_count++;
}

View file

@ -305,7 +305,7 @@ class ROB
private:
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum;
InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[Impl::MaxThreads];

View file

@ -38,10 +38,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
: numEntries(_numEntries),
squashWidth(_squashWidth),
numInstsInROB(0),
squashedSeqNum(0),
numThreads(_numThreads)
{
for (int tid=0; tid < numThreads; tid++) {
squashedSeqNum[tid] = 0;
doneSquashing[tid] = true;
threadEntries[tid] = 0;
}
@ -274,7 +274,7 @@ ROB<Impl>::retireHead(unsigned tid)
--numInstsInROB;
--threadEntries[tid];
head_inst->removeInROB();
head_inst->clearInROB();
head_inst->setCommitted();
instList[tid].erase(head_it);
@ -349,11 +349,11 @@ void
ROB<Impl>::doSquash(unsigned tid)
{
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
tid, squashedSeqNum);
tid, squashedSeqNum[tid]);
assert(squashIt[tid] != instList[tid].end());
if ((*squashIt[tid])->seqNum < squashedSeqNum) {
if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@ -368,7 +368,7 @@ ROB<Impl>::doSquash(unsigned tid)
for (int numSquashed = 0;
numSquashed < squashWidth &&
squashIt[tid] != instList[tid].end() &&
(*squashIt[tid])->seqNum > squashedSeqNum;
(*squashIt[tid])->seqNum > squashedSeqNum[tid];
++numSquashed)
{
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
@ -405,7 +405,7 @@ ROB<Impl>::doSquash(unsigned tid)
// Check if ROB is done squashing.
if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
@ -517,7 +517,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
doneSquashing[tid] = false;
squashedSeqNum = squash_num;
squashedSeqNum[tid] = squash_num;
if (!instList[tid].empty()) {
InstIt tail_thread = instList[tid].end();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -79,13 +79,13 @@ template <class>
class Checker;
/**
* Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
* simple out-of-order capabilities added to it. It is still a 1 CPI machine
* (?), but is capable of handling cache misses. Basically it models having
* a ROB/IQ by only allowing a certain amount of instructions to execute while
* the cache miss is outstanding.
* Light weight out of order CPU model that approximates an out of
* order CPU. It is separated into a front end and a back end, with
* the template parameter Impl describing the classes used for each.
* The goal is to be able to specify through the Impl the class to use
* for the front end and back end, with different classes used to
* model different levels of detail.
*/
template <class Impl>
class OzoneCPU : public BaseCPU
{
@ -98,6 +98,11 @@ class OzoneCPU : public BaseCPU
typedef TheISA::MiscReg MiscReg;
public:
/**
* The ExecContext for this CPU, which is used to provide the
* CPU's interface to any external objects. Internally most of
* the CPU state is stored within the OzoneThreadState class.
*/
class OzoneXC : public ExecContext {
public:
OzoneCPU<Impl> *cpu;
@ -235,14 +240,19 @@ class OzoneCPU : public BaseCPU
#endif
};
// execution context proxy
// ExecContext for OzoneCPU
OzoneXC ozoneXC;
// ExecContext pointer that will be given to any external objects.
ExecContext *xcProxy;
// ExecContext pointer to the CheckerCPU's ExecContext.
ExecContext *checkerXC;
typedef OzoneThreadState<Impl> ImplState;
private:
// Committed thread state for the OzoneCPU.
OzoneThreadState<Impl> thread;
public:
@ -280,12 +290,6 @@ class OzoneCPU : public BaseCPU
tickEvent.squash();
}
private:
Trace::InstRecord *traceData;
template<typename T>
void trace_data(T data);
public:
enum Status {
Running,
@ -361,6 +365,7 @@ class OzoneCPU : public BaseCPU
FrontEnd *frontEnd;
BackEnd *backEnd;
private:
Status status() const { return _status; }
void setStatus(Status new_status) { _status = new_status; }
@ -392,12 +397,11 @@ class OzoneCPU : public BaseCPU
// number of idle cycles
Stats::Average<> notIdleFraction;
Stats::Formula idleFraction;
public:
public:
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
#if FULL_SYSTEM
bool validInstAddr(Addr addr) { return true; }
bool validDataAddr(Addr addr) { return true; }
@ -585,12 +589,9 @@ class OzoneCPU : public BaseCPU
Fault copy(Addr dest);
InstSeqNum globalSeqNum;
public:
void squashFromXC();
// @todo: This can be a useful debug function. Implement it.
void dumpInsts() { frontEnd->dumpInsts(); }
#if FULL_SYSTEM
@ -608,7 +609,6 @@ class OzoneCPU : public BaseCPU
ExecContext *xcBase() { return xcProxy; }
bool decoupledFrontEnd;
struct CommStruct {
InstSeqNum doneSeqNum;
InstSeqNum nonSpecSeqNum;
@ -617,8 +617,13 @@ class OzoneCPU : public BaseCPU
bool stall;
};
InstSeqNum globalSeqNum;
TimeBuffer<CommStruct> comm;
bool decoupledFrontEnd;
bool lockFlag;
Stats::Scalar<> quiesceCycles;

View file

@ -26,9 +26,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//#include <cstdio>
//#include <cstdlib>
#include "arch/isa_traits.hh" // For MachInst
#include "base/trace.hh"
#include "config/full_system.hh"
@ -39,7 +36,6 @@
#include "cpu/ozone/cpu.hh"
#include "cpu/quiesce_event.hh"
#include "cpu/static_inst.hh"
//#include "mem/base_mem.hh"
#include "mem/mem_interface.hh"
#include "sim/sim_object.hh"
#include "sim/stats.hh"
@ -50,7 +46,6 @@
#include "arch/alpha/tlb.hh"
#include "arch/vtophys.hh"
#include "base/callback.hh"
//#include "base/remote_gdb.hh"
#include "cpu/profile.hh"
#include "kern/kernel_stats.hh"
#include "mem/functional/memory_control.hh"
@ -66,15 +61,6 @@
using namespace TheISA;
template <class Impl>
template<typename T>
void
OzoneCPU<Impl>::trace_data(T data) {
if (traceData) {
traceData->setData(data);
}
}
template <class Impl>
OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
: Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
@ -104,7 +90,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
: BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
mem(p->workload[0]->getMemory()),
#endif
comm(5, 5)
comm(5, 5), decoupledFrontEnd(p->decoupledFrontEnd)
{
frontEnd = new FrontEnd(p);
backEnd = new BackEnd(p);
@ -112,6 +98,9 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
_status = Idle;
if (p->checker) {
// If checker is being used, get the checker from the params
// pointer, make the Checker's ExecContext, and setup the
// xcProxy to point to it.
BaseCPU *temp_checker = p->checker;
checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
checker->setMemory(mem);
@ -122,11 +111,17 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.xcProxy = checkerXC;
xcProxy = checkerXC;
} else {
// If checker is not being used, then the xcProxy points
// directly to the CPU's ExecContext.
checker = NULL;
thread.xcProxy = &ozoneXC;
xcProxy = &ozoneXC;
}
// Add xcProxy to CPU list of ExecContexts.
execContexts.push_back(xcProxy);
// Give the OzoneXC pointers to the CPU and the thread state.
ozoneXC.cpu = this;
ozoneXC.thread = &thread;
@ -134,7 +129,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.setStatus(ExecContext::Suspended);
#if FULL_SYSTEM
/***** All thread state stuff *****/
// Setup thread state stuff.
thread.cpu = this;
thread.tid = 0;
thread.mem = p->mem;
@ -171,8 +166,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
numInst = 0;
startNumInst = 0;
execContexts.push_back(xcProxy);
// Give pointers to the front and back end to all things they may need.
frontEnd->setCPU(this);
backEnd->setCPU(this);
@ -188,12 +182,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
frontEnd->setBackEnd(backEnd);
backEnd->setFrontEnd(frontEnd);
decoupledFrontEnd = p->decoupledFrontEnd;
globalSeqNum = 1;
checkInterrupts = false;
lockFlag = 0;
// Setup rename table, initializing all values to ready.
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
thread.renameTable[i] = new DynInst(this);
thread.renameTable[i]->setResultReady();
@ -206,8 +201,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
// pTable = p->pTable;
#endif
lockFlag = 0;
DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
}
@ -231,14 +224,20 @@ template <class Impl>
void
OzoneCPU<Impl>::signalSwitched()
{
// Only complete the switchout when both the front end and back
// end have signalled they are ready to switch.
if (++switchCount == 2) {
backEnd->doSwitchOut();
frontEnd->doSwitchOut();
if (checker)
checker->switchOut(sampler);
_status = SwitchedOut;
if (tickEvent.scheduled())
tickEvent.squash();
sampler->signalSwitched();
}
assert(switchCount <= 2);
@ -793,6 +792,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
thread->quiesceEvent->xc = this;
}
// Copy kernel stats pointer from old context.
thread->kernelStats = old_context->getKernelStats();
// storeCondFailures = 0;
cpu->lockFlag = false;
@ -814,7 +814,11 @@ OzoneCPU<Impl>::OzoneXC::regStats(const std::string &name)
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::serialize(std::ostream &os)
{ }
{
// Once serialization is added, serialize the quiesce event and
// kernel stats. Will need to make sure there aren't multiple
// things that serialize them.
}
template <class Impl>
void
@ -867,7 +871,6 @@ OzoneCPU<Impl>::OzoneXC::getThreadNum()
return thread->tid;
}
// Also somewhat obnoxious. Really only used for the TLB fault.
template <class Impl>
TheISA::MachInst
OzoneCPU<Impl>::OzoneXC::getInst()
@ -901,7 +904,7 @@ OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
// Need to copy the XC values into the current rename table,
// copy the misc regs.
thread->regs.miscRegs.copyMiscRegs(xc);
TheISA::copyMiscRegs(xc, this);
}
template <class Impl>

View file

@ -257,7 +257,7 @@ InorderBackEnd<Impl>::executeInsts()
}
inst->setExecuted();
inst->setCompleted();
inst->setResultReady();
inst->setCanCommit();
instList.pop_front();

View file

@ -848,13 +848,13 @@ template <class Impl>
void
InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
{
OpClass op_class = ready_inst->opClass();
// OpClass op_class = ready_inst->opClass();
readyInsts.push(ready_inst);
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
ready_inst->readPC(), op_class, ready_inst->seqNum);
ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum);
}
/*
template <class Impl>
@ -1175,11 +1175,11 @@ InstQueue<Impl>::addIfReady(DynInstPtr &inst)
return;
}
OpClass op_class = inst->opClass();
// OpClass op_class = inst->opClass();
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
inst->readPC(), op_class, inst->seqNum);
inst->readPC(), inst->opClass(), inst->seqNum);
readyInsts.push(inst);
}

View file

@ -369,37 +369,37 @@ class LWBackEnd
/* Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
*/
Stats::Vector<> rob_cap_events;
Stats::Vector<> rob_cap_inst_count;
Stats::Vector<> iq_cap_events;
Stats::Vector<> iq_cap_inst_count;
Stats::Vector<> robCapEvents;
Stats::Vector<> robCapInstCount;
Stats::Vector<> iqCapEvents;
Stats::Vector<> iqCapInstCount;
// total number of instructions executed
Stats::Vector<> exe_inst;
Stats::Vector<> exe_swp;
Stats::Vector<> exe_nop;
Stats::Vector<> exe_refs;
Stats::Vector<> exe_loads;
Stats::Vector<> exe_branches;
Stats::Vector<> exeInst;
Stats::Vector<> exeSwp;
Stats::Vector<> exeNop;
Stats::Vector<> exeRefs;
Stats::Vector<> exeLoads;
Stats::Vector<> exeBranches;
Stats::Vector<> issued_ops;
Stats::Vector<> issuedOps;
// total number of loads forwaded from LSQ stores
Stats::Vector<> lsq_forw_loads;
Stats::Vector<> lsqForwLoads;
// total number of loads ignored due to invalid addresses
Stats::Vector<> inv_addr_loads;
Stats::Vector<> invAddrLoads;
// total number of software prefetches ignored due to invalid addresses
Stats::Vector<> inv_addr_swpfs;
Stats::Vector<> invAddrSwpfs;
// ready loads blocked due to memory disambiguation
Stats::Vector<> lsq_blocked_loads;
Stats::Vector<> lsqBlockedLoads;
Stats::Scalar<> lsqInversion;
Stats::Vector<> n_issued_dist;
Stats::VectorDistribution<> issue_delay_dist;
Stats::Vector<> nIssuedDist;
Stats::VectorDistribution<> issueDelayDist;
Stats::VectorDistribution<> queue_res_dist;
Stats::VectorDistribution<> queueResDist;
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
@ -417,37 +417,37 @@ class LWBackEnd
Stats::Formula commit_ipb;
Stats::Formula lsq_inv_rate;
*/
Stats::Vector<> writeback_count;
Stats::Vector<> producer_inst;
Stats::Vector<> consumer_inst;
Stats::Vector<> wb_penalized;
Stats::Vector<> writebackCount;
Stats::Vector<> producerInst;
Stats::Vector<> consumerInst;
Stats::Vector<> wbPenalized;
Stats::Formula wb_rate;
Stats::Formula wb_fanout;
Stats::Formula wb_penalized_rate;
Stats::Formula wbRate;
Stats::Formula wbFanout;
Stats::Formula wbPenalizedRate;
// total number of instructions committed
Stats::Vector<> stat_com_inst;
Stats::Vector<> stat_com_swp;
Stats::Vector<> stat_com_refs;
Stats::Vector<> stat_com_loads;
Stats::Vector<> stat_com_membars;
Stats::Vector<> stat_com_branches;
Stats::Vector<> statComInst;
Stats::Vector<> statComSwp;
Stats::Vector<> statComRefs;
Stats::Vector<> statComLoads;
Stats::Vector<> statComMembars;
Stats::Vector<> statComBranches;
Stats::Distribution<> n_committed_dist;
Stats::Distribution<> nCommittedDist;
Stats::Scalar<> commit_eligible_samples;
Stats::Vector<> commit_eligible;
Stats::Scalar<> commitEligibleSamples;
Stats::Vector<> commitEligible;
Stats::Vector<> squashedInsts;
Stats::Vector<> ROBSquashedInsts;
Stats::Scalar<> ROB_fcount;
Stats::Formula ROB_full_rate;
Stats::Scalar<> ROBFcount;
Stats::Formula ROBFullRate;
Stats::Vector<> ROB_count; // cumulative ROB occupancy
Stats::Formula ROB_occ_rate;
Stats::VectorDistribution<> ROB_occ_dist;
Stats::Vector<> ROBCount; // cumulative ROB occupancy
Stats::Formula ROBOccRate;
Stats::VectorDistribution<> ROBOccDist;
public:
void dumpInsts();

View file

@ -251,78 +251,77 @@ void
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
rob_cap_events
robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("number of cycles where ROB cap was active")
.flags(total)
;
rob_cap_inst_count
robCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_inst")
.desc("number of instructions held up by ROB cap")
.flags(total)
;
iq_cap_events
iqCapEvents
.init(cpu->number_of_threads)
.name(name() +".IQ:cap_events" )
.desc("number of cycles where IQ cap was active")
.flags(total)
;
iq_cap_inst_count
iqCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".IQ:cap_inst")
.desc("number of instructions held up by IQ cap")
.flags(total)
;
exe_inst
exeInst
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:count")
.desc("number of insts issued")
.flags(total)
;
exe_swp
exeSwp
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:swp")
.desc("number of swp insts issued")
.flags(total)
;
exe_nop
exeNop
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:nop")
.desc("number of nop insts issued")
.flags(total)
;
exe_refs
exeRefs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:refs")
.desc("number of memory reference insts issued")
.flags(total)
;
exe_loads
exeLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:loads")
.desc("number of load insts issued")
.flags(total)
;
exe_branches
exeBranches
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:branches")
.desc("Number of branches issued")
.flags(total)
;
issued_ops
issuedOps
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:op_count")
.desc("number of insts issued")
@ -339,28 +338,28 @@ LWBackEnd<Impl>::regStats()
//
// Other stats
//
lsq_forw_loads
lsqForwLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:forw_loads")
.desc("number of loads forwarded via LSQ")
.flags(total)
;
inv_addr_loads
invAddrLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_loads")
.desc("number of invalid-address loads")
.flags(total)
;
inv_addr_swpfs
invAddrSwpfs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_swpfs")
.desc("number of invalid-address SW prefetches")
.flags(total)
;
lsq_blocked_loads
lsqBlockedLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:blocked_loads")
.desc("number of ready loads not issued due to memory disambiguation")
@ -372,51 +371,51 @@ LWBackEnd<Impl>::regStats()
.desc("Number of times LSQ instruction issued early")
;
n_issued_dist
nIssuedDist
.init(issueWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
issue_delay_dist
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
queue_res_dist
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
queue_res_dist.subname(i, opClassStrings[i]);
queueResDist.subname(i, opClassStrings[i]);
}
writeback_count
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
producer_inst
producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
consumer_inst
consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
wb_penalized
wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
@ -424,71 +423,71 @@ LWBackEnd<Impl>::regStats()
;
wb_penalized_rate
wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
wb_penalized_rate = wb_penalized / writeback_count;
wbPenalizedRate = wbPenalized / writebackCount;
wb_fanout
wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
wb_fanout = producer_inst / consumer_inst;
wbFanout = producerInst / consumerInst;
wb_rate
wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
wb_rate = writeback_count / cpu->numCycles;
wbRate = writebackCount / cpu->numCycles;
stat_com_inst
statComInst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
stat_com_swp
statComSwp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
stat_com_refs
statComRefs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
stat_com_loads
statComLoads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
stat_com_membars
statComMembars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
stat_com_branches
statComBranches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
n_committed_dist
nCommittedDist
.init(0,commitWidth,1)
.name(name() + ".COM:committed_per_cycle")
.desc("Number of insts commited each cycle")
@ -508,14 +507,14 @@ LWBackEnd<Impl>::regStats()
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
commit_eligible
commitEligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
commit_eligible_samples
commitEligibleSamples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
@ -532,32 +531,32 @@ LWBackEnd<Impl>::regStats()
.desc("Number of instructions removed from inst list when they reached the head of the ROB")
;
ROB_fcount
ROBFcount
.name(name() + ".ROB:full_count")
.desc("number of cycles where ROB was full")
;
ROB_count
ROBCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:occupancy")
.desc(name() + ".ROB occupancy (cumulative)")
.flags(total)
;
ROB_full_rate
ROBFullRate
.name(name() + ".ROB:full_rate")
.desc("ROB full per cycle")
;
ROB_full_rate = ROB_fcount / cpu->numCycles;
ROBFullRate = ROBFcount / cpu->numCycles;
ROB_occ_rate
ROBOccRate
.name(name() + ".ROB:occ_rate")
.desc("ROB occupancy rate")
.flags(total)
;
ROB_occ_rate = ROB_count / cpu->numCycles;
ROBOccRate = ROBCount / cpu->numCycles;
ROB_occ_dist
ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
@ -660,7 +659,7 @@ LWBackEnd<Impl>::tick()
return;
}
ROB_count[0]+= numInsts;
ROBCount[0]+= numInsts;
wbCycle = 0;
@ -980,8 +979,8 @@ LWBackEnd<Impl>::executeInsts()
}
}
issued_ops[0]+= num_executed;
n_issued_dist[num_executed]++;
issuedOps[0]+= num_executed;
nIssuedDist[num_executed]++;
}
template<class Impl>
@ -1002,13 +1001,13 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
producer_inst[0]++;
consumer_inst[0]+= dependents;
producerInst[0]++;
consumerInst[0]+= dependents;
}
}
}
writeback_count[0]++;
writebackCount[0]++;
}
#if 0
template <class Impl>
@ -1076,7 +1075,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
thread->setPC(inst->readPC());
thread->setNextPC(inst->readNextPC());
inst->reachedCommit = true;
inst->setAtCommit();
// If the instruction is not executed yet, then it is a non-speculative
// or store inst. Signal backwards that it should be executed.
@ -1229,6 +1228,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
inst->traceData = NULL;
}
if (inst->isCopy())
panic("Should not commit any copy instructions!");
inst->clearDependents();
frontEnd->addFreeRegs(freed_regs);
@ -1292,7 +1294,7 @@ LWBackEnd<Impl>::commitInsts()
break;
}
}
n_committed_dist.sample(inst_num);
nCommittedDist.sample(inst_num);
}
template <class Impl>
@ -1344,7 +1346,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
(*insts_it)->setCanCommit();
(*insts_it)->removeInROB();
(*insts_it)->clearInROB();
for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
@ -1522,27 +1524,27 @@ LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
exe_swp[thread_number]++;
exeSwp[thread_number]++;
else
exe_inst[thread_number]++;
exeInst[thread_number]++;
#else
exe_inst[thread_number]++;
exeInst[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
exe_branches[thread_number]++;
exeBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
exe_refs[thread_number]++;
exeRefs[thread_number]++;
if (inst->isLoad())
exe_loads[thread_number]++;
exeLoads[thread_number]++;
}
}
@ -1562,33 +1564,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
stat_com_swp[tid]++;
statComSwp[tid]++;
} else {
stat_com_inst[tid]++;
statComInst[tid]++;
}
#else
stat_com_inst[tid]++;
statComInst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
stat_com_branches[tid]++;
statComBranches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
stat_com_refs[tid]++;
statComRefs[tid]++;
if (inst->isLoad()) {
stat_com_loads[tid]++;
statComLoads[tid]++;
}
}
if (inst->isMemBarrier()) {
stat_com_membars[tid]++;
statComMembars[tid]++;
}
}

View file

@ -447,7 +447,7 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// too).
// @todo: Fix uncached accesses.
if (req->flags & UNCACHEABLE &&
(inst != loadQueue.back() || !inst->reachedCommit)) {
(inst != loadQueue.back() || !inst->isAtCommit())) {
DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
"commit/LSQ!\n",
inst->seqNum);

View file

@ -182,8 +182,6 @@ struct OzoneThreadState : public ThreadState {
void setNextPC(uint64_t val)
{ nextPC = val; }
bool misspeculating() { return false; }
void setInst(TheISA::MachInst _inst) { inst = _inst; }
Counter readFuncExeInst() { return funcExeInst; }

View file

@ -60,6 +60,7 @@ struct ThreadState {
: cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid)
#endif
{
numInst = 0;
funcExeInst = 0;
storeCondFailures = 0;
}

View file

@ -39,12 +39,10 @@ class DerivAlphaFullCPU(BaseCPU):
"Issue/Execute/Writeback delay")
issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
"to the IEW stage)")
issueWidth = Param.Unsigned("Issue width")
executeWidth = Param.Unsigned("Execute width")
executeIntWidth = Param.Unsigned("Integer execute width")
executeFloatWidth = Param.Unsigned("Floating point execute width")
executeBranchWidth = Param.Unsigned("Branch execute width")
executeMemoryWidth = Param.Unsigned("Memory execute width")
dispatchWidth = Param.Unsigned(8, "Dispatch width")
issueWidth = Param.Unsigned(8, "Issue width")
wbWidth = Param.Unsigned(8, "Writeback width")
wbDepth = Param.Unsigned(1, "Writeback depth")
fuPool = Param.FUPool(NULL, "Functional Unit pool")
iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
@ -55,6 +53,9 @@ class DerivAlphaFullCPU(BaseCPU):
trapLatency = Param.Tick("Trap latency")
fetchTrapLatency = Param.Tick("Fetch trap latency")
backComSize = Param.Unsigned(5, "Time buffer size for backwards communication")
forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication")
predType = Param.String("Branch predictor type ('local', 'tournament')")
localPredictorSize = Param.Unsigned("Size of local predictor")
localCtrBits = Param.Unsigned("Bits per counter")