New stats added to O3 model.

--HG--
extra : convert_revision : 7abb491e89e3e1a331cd19aa05ddce5184abf9e0
This commit is contained in:
Kevin Lim 2006-04-24 17:06:00 -04:00
parent b14bf03219
commit 676afbe2c7
10 changed files with 555 additions and 76 deletions

View file

@ -369,6 +369,8 @@ class DefaultCommit
/** Rename map interface. */
RenameMap *renameMap[Impl::MaxThreads];
void updateComInstStats(DynInstPtr &inst);
/** Stat for the total number of committed instructions. */
Stats::Scalar<> commitCommittedInsts;
/** Stat for the total number of squashed instructions discarded by commit.
@ -383,15 +385,26 @@ class DefaultCommit
*/
Stats::Scalar<> commitNonSpecStalls;
/** Stat for the total number of committed branches. */
Stats::Scalar<> commitCommittedBranches;
// Stats::Scalar<> commitCommittedBranches;
/** Stat for the total number of committed loads. */
Stats::Scalar<> commitCommittedLoads;
// Stats::Scalar<> commitCommittedLoads;
/** Stat for the total number of committed memory references. */
Stats::Scalar<> commitCommittedMemRefs;
// Stats::Scalar<> commitCommittedMemRefs;
/** Stat for the total number of branch mispredicts that caused a squash. */
Stats::Scalar<> branchMispredicts;
/** Distribution of the number of committed instructions each cycle. */
Stats::Distribution<> numCommittedDist;
// total number of instructions committed
Stats::Vector<> stat_com_inst;
Stats::Vector<> stat_com_swp;
Stats::Vector<> stat_com_refs;
Stats::Vector<> stat_com_loads;
Stats::Vector<> stat_com_membars;
Stats::Vector<> stat_com_branches;
Stats::Scalar<> commit_eligible_samples;
Stats::Vector<> commit_eligible;
};
#endif // __CPU_O3_COMMIT_HH__

View file

@ -133,6 +133,7 @@ template <class Impl>
void
DefaultCommit<Impl>::regStats()
{
using namespace Stats;
commitCommittedInsts
.name(name() + ".commitCommittedInsts")
.desc("The number of committed instructions")
@ -150,6 +151,7 @@ DefaultCommit<Impl>::regStats()
.desc("The number of times commit has been forced to stall to "
"communicate backwards")
.prereq(commitNonSpecStalls);
/*
commitCommittedBranches
.name(name() + ".commitCommittedBranches")
.desc("The number of committed branches")
@ -162,6 +164,7 @@ DefaultCommit<Impl>::regStats()
.name(name() + ".commitCommittedMemRefs")
.desc("The number of committed memory references")
.prereq(commitCommittedMemRefs);
*/
branchMispredicts
.name(name() + ".branchMispredicts")
.desc("The number of times a branch was mispredicted")
@ -172,6 +175,73 @@ DefaultCommit<Impl>::regStats()
.desc("Number of insts commited each cycle")
.flags(Stats::pdf)
;
stat_com_inst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
stat_com_swp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
stat_com_refs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
stat_com_loads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
stat_com_membars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
stat_com_branches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
//
// Commit-Eligible instructions...
//
// -> The number of instructions eligible to commit in those
// cycles where we reached our commit BW limit (less the number
// actually committed)
//
// -> The average value is computed over ALL CYCLES... not just
// the BW limited cycles
//
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
commit_eligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
commit_eligible_samples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
}
template <class Impl>
@ -1060,9 +1130,7 @@ head_inst->isWriteBarrier())*/
return false;
}
if (head_inst->isControl()) {
++commitCommittedBranches;
}
updateComInstStats(head_inst);
// Now that the instruction is going to be committed, finalize its
// trace data.
@ -1186,6 +1254,47 @@ DefaultCommit<Impl>::robDoneSquashing()
return true;
}
template <class Impl>
void
DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
{
unsigned thread = inst->threadNumber;
//
// Pick off the software prefetches
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
stat_com_swp[thread]++;
} else {
stat_com_inst[thread]++;
}
#else
stat_com_inst[thread]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
stat_com_branches[thread]++;
//
// Memory references
//
if (inst->isMemRef()) {
stat_com_refs[thread]++;
if (inst->isLoad()) {
stat_com_loads[thread]++;
}
}
if (inst->isMemBarrier()) {
stat_com_membars[thread]++;
}
}
////////////////////////////////////////
// //
// SMT COMMIT POLICY MAITAINED HERE //

View file

@ -370,6 +370,7 @@ class DefaultFetch
Stats::Scalar<> icacheStallCycles;
/** Stat for total number of fetched instructions. */
Stats::Scalar<> fetchedInsts;
Stats::Scalar<> fetchedBranches;
/** Stat for total number of predicted branches. */
Stats::Scalar<> predictedBranches;
/** Stat for total number of cycles spent fetching. */
@ -383,6 +384,8 @@ class DefaultFetch
Stats::Scalar<> fetchBlockedCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
Stats::Scalar<> fetchIcacheSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution<> fetchNisnDist;
Stats::Formula idleRate;

View file

@ -178,6 +178,11 @@ DefaultFetch<Impl>::regStats()
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
fetchedBranches
.name(name() + ".fetchedBranches")
.desc("Number of branches that fetch encountered")
.prereq(fetchedBranches);
predictedBranches
.name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
@ -209,6 +214,11 @@ DefaultFetch<Impl>::regStats()
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetchIcacheSquashes
.name(name() + ".fetchIcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
fetchNisnDist
.init(/* base value */ 0,
/* last value */ fetchWidth,
@ -322,8 +332,10 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
if (fetchStatus[tid] != IcacheMissStall ||
req != memReq[tid])
req != memReq[tid]) {
++fetchIcacheSquashes;
return;
}
// Wake up the CPU (if it went to sleep and was waiting on this completion
// event).
@ -400,6 +412,8 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
++fetchedBranches;
if (predict_taken) {
++predictedBranches;
}
@ -457,6 +471,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// If translation was successful, attempt to read the first
// instruction.
if (fault == NoFault) {
#if FULL_SYSTEM
if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) {
DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path!",
@ -464,6 +479,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
ret_fault = TheISA::genMachineCheckFault();
return false;
}
#endif
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
fault = cpu->mem->read(memReq[tid], cacheData[tid]);
@ -480,6 +496,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
MemAccessResult result = icacheInterface->access(memReq[tid]);
fetchedCacheLines++;
// If the cache missed, then schedule an event to wake
// up this stage once the cache miss completes.
// @todo: Possibly allow for longer than 1 cycle cache hits.
@ -499,8 +517,6 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
"read.\n", tid);
// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
fetchedCacheLines++;
}
} else {
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
@ -889,10 +905,14 @@ DefaultFetch<Impl>::fetch(bool &status_change)
if (!fetch_success)
return;
} else {
if (fetchStatus[tid] == Blocked) {
if (fetchStatus[tid] == Idle) {
++fetchIdleCycles;
} else if (fetchStatus[tid] == Blocked) {
++fetchBlockedCycles;
} else if (fetchStatus[tid] == Squashing) {
++fetchSquashCycles;
} else if (fetchStatus[tid] == IcacheMissStall) {
++icacheStallCycles;
}
// Status is Idle, Squashing, Blocked, or IcacheMissStall, so
@ -904,6 +924,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// If we had a stall due to an icache miss, then return.
if (fetchStatus[tid] == IcacheMissStall) {
++icacheStallCycles;
status_change = true;
return;
}

View file

@ -278,6 +278,8 @@ class DefaultIEW
void tick();
private:
void updateExeInstStats(DynInstPtr &inst);
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<TimeStruct> *timeBuffer;
@ -443,9 +445,9 @@ class DefaultIEW
/** Stat for total number of executed instructions. */
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Scalar<> iewExecLoadInsts;
Stats::Vector<> iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
Stats::Scalar<> iewExecStoreInsts;
// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Stat for total number of memory ordering violation events. */
@ -456,6 +458,33 @@ class DefaultIEW
Stats::Scalar<> predictedNotTakenIncorrect;
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
Stats::Vector<> exe_swp;
Stats::Vector<> exe_nop;
Stats::Vector<> exe_refs;
Stats::Vector<> exe_branches;
// Stats::Vector<> issued_ops;
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
*/
Stats::Formula issue_rate;
Stats::Formula iewExecStoreInsts;
// Stats::Formula issue_op_rate;
// Stats::Formula fu_busy_rate;
Stats::Vector<> iewInstsToCommit;
Stats::Vector<> writeback_count;
Stats::Vector<> producer_inst;
Stats::Vector<> consumer_inst;
Stats::Vector<> wb_penalized;
Stats::Formula wb_rate;
Stats::Formula wb_fanout;
Stats::Formula wb_penalized_rate;
};
#endif // __CPU_O3_IEW_HH__

View file

@ -140,6 +140,8 @@ template <class Impl>
void
DefaultIEW<Impl>::regStats()
{
using namespace Stats;
instQueue.regStats();
//ldstQueue.regStats();
@ -195,13 +197,15 @@ DefaultIEW<Impl>::regStats()
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
.name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed");
.desc("Number of load instructions executed")
.flags(total);
/*
iewExecStoreInsts
.name(name() + ".iewExecStoreInsts")
.desc("Number of store instructions executed");
*/
iewExecSquashedInsts
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
@ -223,6 +227,116 @@ DefaultIEW<Impl>::regStats()
.desc("Number of branch mispredicts detected at execute");
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
exe_swp
.init(cpu->number_of_threads)
.name(name() + ".EXEC:swp")
.desc("number of swp insts executed")
.flags(total)
;
exe_nop
.init(cpu->number_of_threads)
.name(name() + ".EXEC:nop")
.desc("number of nop insts executed")
.flags(total)
;
exe_refs
.init(cpu->number_of_threads)
.name(name() + ".EXEC:refs")
.desc("number of memory reference insts executed")
.flags(total)
;
exe_branches
.init(cpu->number_of_threads)
.name(name() + ".EXEC:branches")
.desc("Number of branches executed")
.flags(total)
;
issue_rate
.name(name() + ".EXEC:rate")
.desc("Inst execution rate")
.flags(total)
;
issue_rate = iewExecutedInsts / cpu->numCycles;
iewExecStoreInsts
.name(name() + ".EXEC:stores")
.desc("Number of stores executed")
.flags(total)
;
iewExecStoreInsts = exe_refs - iewExecLoadInsts;
/*
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
subname << opClassStrings[i] << "_delay";
issue_delay_dist.subname(i, subname.str());
}
*/
//
// Other stats
//
iewInstsToCommit
.init(cpu->number_of_threads)
.name(name() + ".WB:sent")
.desc("cumulative count of insts sent to commit")
.flags(total)
;
writeback_count
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
producer_inst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
consumer_inst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
wb_penalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
.flags(total)
;
wb_penalized_rate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
wb_penalized_rate = wb_penalized / writeback_count;
wb_fanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
wb_fanout = producer_inst / consumer_inst;
wb_rate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
wb_rate = writeback_count / cpu->numCycles;
}
template<class Impl>
@ -990,6 +1104,8 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
instQueue.advanceTail(inst);
exe_nop[tid]++;
add_to_iq = false;
} else if (inst->isExecuted()) {
assert(0 && "Instruction shouldn't be executed.\n");
@ -1124,11 +1240,11 @@ DefaultIEW<Impl>::executeInsts()
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
++iewExecLoadInsts;
// ++iewExecLoadInsts;
} else if (inst->isStore()) {
ldstQueue.executeStore(inst);
++iewExecStoreInsts;
// ++iewExecStoreInsts;
// If the store had a fault then it may not have a mem req
if (inst->req && !(inst->req->flags & LOCKED)) {
@ -1146,13 +1262,13 @@ DefaultIEW<Impl>::executeInsts()
} else {
inst->execute();
++iewExecutedInsts;
inst->setExecuted();
instToCommit(inst);
}
updateExeInstStats(inst);
// Check if branch was correct. This check happens after the
// instruction is added to the queue because even if the branch
// is mispredicted, the branch instruction itself is still valid.
@ -1243,17 +1359,20 @@ DefaultIEW<Impl>::writebackInsts()
for (int inst_num = 0; inst_num < issueWidth &&
toCommit->insts[inst_num]; inst_num++) {
DynInstPtr inst = toCommit->insts[inst_num];
int tid = inst->threadNumber;
DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
inst->readPC());
iewInstsToCommit[tid]++;
// Some instructions will be sent to commit without having
// executed because they need commit to handle them.
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
if (!inst->isSquashed() && inst->isExecuted()) {
instQueue.wakeDependents(inst);
int dependents = instQueue.wakeDependents(inst);
for (int i = 0; i < inst->numDestRegs(); i++) {
//mark as Ready
@ -1261,6 +1380,10 @@ DefaultIEW<Impl>::writebackInsts()
inst->renamedDestRegIdx(i));
scoreboard->setReg(inst->renamedDestRegIdx(i));
}
producer_inst[tid]++;
consumer_inst[tid]+= dependents;
writeback_count[tid]++;
}
}
}
@ -1390,3 +1513,39 @@ DefaultIEW<Impl>::tick()
cpu->activityThisCycle();
}
}
template <class Impl>
void
DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
{
int thread_number = inst->threadNumber;
//
// Pick off the software prefetches
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
exe_swp[thread_number]++;
else
iewExecutedInsts++;
#else
iewExecutedInsts[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
exe_branches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
exe_refs[thread_number]++;
if (inst->isLoad()) {
iewExecLoadInsts[thread_number]++;
}
}
}

View file

@ -185,7 +185,7 @@ class InstructionQueue
void commit(const InstSeqNum &inst, unsigned tid = 0);
/** Wakes all dependents of a completed instruction. */
void wakeDependents(DynInstPtr &completed_inst);
int wakeDependents(DynInstPtr &completed_inst);
/** Adds a ready memory instruction to the ready list. */
void addReadyMemInst(DynInstPtr &ready_inst);
@ -479,6 +479,7 @@ class InstructionQueue
/** Stat for number of non-speculative instructions added. */
Stats::Scalar<> iqNonSpecInstsAdded;
// Stats::Scalar<> iqIntInstsAdded;
Stats::Scalar<> iqInstsIssued;
/** Stat for number of integer instructions issued. */
Stats::Scalar<> iqIntInstsIssued;
// Stats::Scalar<> iqFloatInstsAdded;
@ -505,6 +506,20 @@ class InstructionQueue
*/
Stats::Scalar<> iqSquashedNonSpecRemoved;
Stats::VectorDistribution<> queue_res_dist;
Stats::Vector<> n_issued_dist;
Stats::VectorDistribution<> issue_delay_dist;
Stats::Vector<> stat_fu_busy;
// Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
Stats::Formula issue_rate;
// Stats::Formula issue_stores;
// Stats::Formula issue_op_rate;
Stats::Vector<> fu_busy; //cumulative fu busy
Stats::Formula fu_busy_rate;
};
#endif //__CPU_O3_INST_QUEUE_HH__

View file

@ -224,6 +224,7 @@ template <class Impl>
void
InstructionQueue<Impl>::regStats()
{
using namespace Stats;
iqInstsAdded
.name(name() + ".iqInstsAdded")
.desc("Number of instructions added to the IQ (excludes non-spec)")
@ -236,6 +237,11 @@ InstructionQueue<Impl>::regStats()
// iqIntInstsAdded;
iqInstsIssued
.name(name() + ".iqInstsIssued")
.desc("Number of instructions issued")
.prereq(iqInstsIssued);
iqIntInstsIssued
.name(name() + ".iqIntInstsIssued")
.desc("Number of integer instructions issued")
@ -291,6 +297,103 @@ InstructionQueue<Impl>::regStats()
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
queue_res_dist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
queue_res_dist.subname(i, opClassStrings[i]);
}
n_issued_dist
.init(totalWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
/*
dist_unissued
.init(Num_OpClasses+2)
.name(name() + ".ISSUE:unissued_cause")
.desc("Reason ready instruction not issued")
.flags(pdf | dist)
;
for (int i=0; i < (Num_OpClasses + 2); ++i) {
dist_unissued.subname(i, unissued_names[i]);
}
*/
stat_issued_inst_type
.init(numThreads,Num_OpClasses)
.name(name() + ".ISSUE:FU_type")
.desc("Type of FU issued")
.flags(total | pdf | dist)
;
stat_issued_inst_type.ysubnames(opClassStrings);
//
// How long did instructions for a particular FU type wait prior to issue
//
issue_delay_dist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
for (int i=0; i<Num_OpClasses; ++i) {
stringstream subname;
subname << opClassStrings[i] << "_delay";
issue_delay_dist.subname(i, subname.str());
}
issue_rate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
.flags(total)
;
issue_rate = iqInstsIssued / cpu->numCycles;
/*
issue_stores
.name(name() + ".ISSUE:stores")
.desc("Number of stores issued")
.flags(total)
;
issue_stores = exe_refs - exe_loads;
*/
/*
issue_op_rate
.name(name() + ".ISSUE:op_rate")
.desc("Operation issue rate")
.flags(total)
;
issue_op_rate = issued_ops / numCycles;
*/
stat_fu_busy
.init(Num_OpClasses)
.name(name() + ".ISSUE:fu_full")
.desc("attempts to use FU when none available")
.flags(pdf | dist)
;
for (int i=0; i < Num_OpClasses; ++i) {
stat_fu_busy.subname(i, opClassStrings[i]);
}
fu_busy
.init(numThreads)
.name(name() + ".ISSUE:fu_busy_cnt")
.desc("FU busy when requested")
.flags(total)
;
fu_busy_rate
.name(name() + ".ISSUE:fu_busy_rate")
.desc("FU busy rate (busy events/executed inst)")
.flags(total)
;
fu_busy_rate = fu_busy / iqInstsIssued;
for ( int i=0; i < numThreads; i++) {
// Tell mem dependence unit to reg stats as well.
memDepUnit[i].regStats();
@ -658,6 +761,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
int idx = fuPool->getUnit(op_class);
int tid = issuing_inst->threadNumber;
if (idx == -2) {
assert(op_class == No_OpClass);
@ -666,7 +771,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU"
" %#x [sn:%lli]\n",
issuing_inst->threadNumber, issuing_inst->readPC(),
tid, issuing_inst->readPC(),
issuing_inst->seqNum);
readyInsts[op_class].pop();
@ -685,14 +790,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// Memory instructions can not be freed from the IQ until they
// complete.
++freeEntries;
count[issuing_inst->threadNumber]--;
count[tid]--;
issuing_inst->removeInIQ();
} else {
memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
stat_issued_inst_type[tid][op_class]++;
} else if (idx != -1) {
int op_latency = fuPool->getOpLatency(op_class);
@ -722,7 +828,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
"[sn:%lli]\n",
issuing_inst->threadNumber, issuing_inst->readPC(),
tid, issuing_inst->readPC(),
issuing_inst->seqNum);
readyInsts[op_class].pop();
@ -741,14 +847,17 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// Memory instructions can not be freed from the IQ until they
// complete.
++freeEntries;
count[issuing_inst->threadNumber]--;
count[tid]--;
issuing_inst->removeInIQ();
} else {
memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
memDepUnit[tid].issue(issuing_inst);
}
listOrder.erase(order_it++);
stat_issued_inst_type[tid][op_class]++;
} else {
stat_fu_busy[op_class]++;
fu_busy[tid]++;
++order_it;
}
}
@ -808,9 +917,11 @@ InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
}
template <class Impl>
void
int
InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
{
int dependents = 0;
DPRINTF(IQ, "Waking dependents of completed instruction.\n");
assert(!completed_inst->isSquashed());
@ -875,6 +986,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
curr = prev->next;
prev->inst = NULL;
++dependents;
delete prev;
}
@ -886,6 +999,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
// Mark the scoreboard as having that register ready.
regScoreboard[dest_reg] = true;
}
return dependents;
}
template <class Impl>

View file

@ -90,7 +90,7 @@ class DefaultRename
Squashing,
Blocked,
Unblocking,
BarrierStall
SerializeStall
};
private:
@ -359,8 +359,8 @@ class DefaultRename
/** Tracks which stages are telling decode to stall. */
Stalls stalls[Impl::MaxThreads];
/** The barrier instruction that rename has stalled on. */
DynInstPtr barrierInst[Impl::MaxThreads];
/** The serialize instruction that rename has stalled on. */
DynInstPtr serializeInst[Impl::MaxThreads];
/** Records if rename needs to serialize on the next instruction for any
* thread.
@ -419,8 +419,8 @@ class DefaultRename
Stats::Scalar<> renameIdleCycles;
/** Stat for total number of cycles spent blocking. */
Stats::Scalar<> renameBlockCycles;
/** Stat for total number of cycles spent stalling for a barrier. */
Stats::Scalar<> renameBarrierCycles;
/** Stat for total number of cycles spent stalling for a serializing inst. */
Stats::Scalar<> renameSerializeStallCycles;
/** Stat for total number of cycles spent running normally. */
Stats::Scalar<> renameRunCycles;
/** Stat for total number of cycles spent unblocking. */
@ -446,6 +446,8 @@ class DefaultRename
Stats::Scalar<> renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
Stats::Scalar<> renameUndoneMaps;
Stats::Scalar<> renamedSerializing;
Stats::Scalar<> renamedTempSerializing;
};
#endif // __CPU_O3_RENAME_HH__

View file

@ -53,7 +53,7 @@ DefaultRename<Impl>::DefaultRename(Params *params)
stalls[i].iew = false;
stalls[i].commit = false;
barrierInst[i] = NULL;
serializeInst[i] = NULL;
instsInProgress[i] = 0;
@ -78,69 +78,79 @@ void
DefaultRename<Impl>::regStats()
{
renameSquashCycles
.name(name() + ".renameSquashCycles")
.name(name() + ".RENAME:SquashCycles")
.desc("Number of cycles rename is squashing")
.prereq(renameSquashCycles);
renameIdleCycles
.name(name() + ".renameIdleCycles")
.name(name() + ".RENAME:IdleCycles")
.desc("Number of cycles rename is idle")
.prereq(renameIdleCycles);
renameBlockCycles
.name(name() + ".renameBlockCycles")
.name(name() + ".RENAME:BlockCycles")
.desc("Number of cycles rename is blocking")
.prereq(renameBlockCycles);
renameBarrierCycles
.name(name() + ".renameBarrierCycles")
.desc("Number of cycles rename is blocking due to a barrier stall")
.prereq(renameBarrierCycles);
renameSerializeStallCycles
.name(name() + ".RENAME:serializeStallCycles")
.desc("count of cycles rename stalled for serializing inst")
.flags(Stats::total);
renameRunCycles
.name(name() + ".renameRunCycles")
.name(name() + ".RENAME:RunCycles")
.desc("Number of cycles rename is running")
.prereq(renameIdleCycles);
renameUnblockCycles
.name(name() + ".renameUnblockCycles")
.name(name() + ".RENAME:UnblockCycles")
.desc("Number of cycles rename is unblocking")
.prereq(renameUnblockCycles);
renameRenamedInsts
.name(name() + ".renameRenamedInsts")
.name(name() + ".RENAME:RenamedInsts")
.desc("Number of instructions processed by rename")
.prereq(renameRenamedInsts);
renameSquashedInsts
.name(name() + ".renameSquashedInsts")
.name(name() + ".RENAME:SquashedInsts")
.desc("Number of squashed instructions processed by rename")
.prereq(renameSquashedInsts);
renameROBFullEvents
.name(name() + ".renameROBFullEvents")
.name(name() + ".RENAME:ROBFullEvents")
.desc("Number of times rename has blocked due to ROB full")
.prereq(renameROBFullEvents);
renameIQFullEvents
.name(name() + ".renameIQFullEvents")
.name(name() + ".RENAME:IQFullEvents")
.desc("Number of times rename has blocked due to IQ full")
.prereq(renameIQFullEvents);
renameLSQFullEvents
.name(name() + ".renameLSQFullEvents")
.name(name() + ".RENAME:LSQFullEvents")
.desc("Number of times rename has blocked due to LSQ full")
.prereq(renameLSQFullEvents);
renameFullRegistersEvents
.name(name() + ".renameFullRegisterEvents")
.name(name() + ".RENAME:FullRegisterEvents")
.desc("Number of times there has been no free registers")
.prereq(renameFullRegistersEvents);
renameRenamedOperands
.name(name() + ".renameRenamedOperands")
.name(name() + ".RENAME:RenamedOperands")
.desc("Number of destination operands rename has renamed")
.prereq(renameRenamedOperands);
renameRenameLookups
.name(name() + ".renameRenameLookups")
.name(name() + ".RENAME:RenameLookups")
.desc("Number of register rename lookups that rename has made")
.prereq(renameRenameLookups);
renameCommittedMaps
.name(name() + ".renameCommittedMaps")
.name(name() + ".RENAME:CommittedMaps")
.desc("Number of HB maps that are committed")
.prereq(renameCommittedMaps);
renameUndoneMaps
.name(name() + ".renameUndoneMaps")
.name(name() + ".RENAME:UndoneMaps")
.desc("Number of HB maps that are undone due to squashing")
.prereq(renameUndoneMaps);
renamedSerializing
.name(name() + ".RENAME:serializingInsts")
.desc("count of serializing insts renamed")
.flags(Stats::total)
;
renamedTempSerializing
.name(name() + ".RENAME:tempSerializingInsts")
.desc("count of temporary serializing insts renamed")
.flags(Stats::total)
;
}
template <class Impl>
@ -254,7 +264,7 @@ DefaultRename<Impl>::squash(unsigned tid)
// cycle and there should be space to hold everything due to the squash.
if (renameStatus[tid] == Blocked ||
renameStatus[tid] == Unblocking ||
renameStatus[tid] == BarrierStall) {
renameStatus[tid] == SerializeStall) {
#if !FULL_SYSTEM
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals to
@ -267,7 +277,7 @@ DefaultRename<Impl>::squash(unsigned tid)
#else
toDecode->renameUnblock[tid] = 1;
#endif
barrierInst[tid] = NULL;
serializeInst[tid] = NULL;
}
// Set the status to Squashing.
@ -370,8 +380,8 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
++renameBlockCycles;
} else if (renameStatus[tid] == Squashing) {
++renameSquashCycles;
} else if (renameStatus[tid] == BarrierStall) {
++renameBarrierCycles;
} else if (renameStatus[tid] == SerializeStall) {
++renameSerializeStallCycles;
}
if (renameStatus[tid] == Running ||
@ -535,14 +545,18 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
DPRINTF(Rename, "Serialize before instruction encountered.\n");
if (!inst->isTempSerializeBefore())
if (!inst->isTempSerializeBefore()) {
renamedSerializing++;
inst->setSerializeHandled();
} else {
renamedTempSerializing++;
}
// Change status over to BarrierStall so that other stages know
// Change status over to SerializeStall so that other stages know
// what this is blocked on.
renameStatus[tid] = BarrierStall;
renameStatus[tid] = SerializeStall;
barrierInst[tid] = inst;
serializeInst[tid] = inst;
blockThisCycle = true;
@ -716,9 +730,9 @@ DefaultRename<Impl>::block(unsigned tid)
wroteToTimeBuffer = true;
}
// Rename can not go from BarrierStall to Blocked, otherwise it would
// not know to complete the barrier stall.
if (renameStatus[tid] != BarrierStall) {
// Rename can not go from SerializeStall to Blocked, otherwise it would
// not know to complete the serialize stall.
if (renameStatus[tid] != SerializeStall) {
// Set status to Blocked.
renameStatus[tid] = Blocked;
return true;
@ -735,7 +749,7 @@ DefaultRename<Impl>::unblock(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
// Rename is done unblocking if the skid buffer is empty.
if (skidBuffer[tid].empty() && renameStatus[tid] != BarrierStall) {
if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) {
DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
@ -1008,9 +1022,9 @@ DefaultRename<Impl>::checkStall(unsigned tid)
} else if (renameMap[tid]->numFreeEntries() <= 0) {
DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
ret_val = true;
} else if (renameStatus[tid] == BarrierStall &&
} else if (renameStatus[tid] == SerializeStall &&
(!emptyROB[tid] || instsInProgress[tid])) {
DPRINTF(Rename,"[tid:%i]: Stall: Barrier stall and ROB is not "
DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not "
"empty.\n",
tid);
ret_val = true;
@ -1064,7 +1078,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
// if so then go to unblocking
// If status was Squashing
// check if squashing is not high. Switch to running this cycle.
// If status was barrier stall
// If status was serialize stall
// check if ROB is empty and no insts are in flight to the ROB
readFreeEntries(tid);
@ -1113,12 +1127,12 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
return false;
}
if (renameStatus[tid] == BarrierStall) {
if (renameStatus[tid] == SerializeStall) {
// Stall ends once the ROB is free.
DPRINTF(Rename, "[tid:%u]: Done with barrier stall, switching to "
DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to "
"unblocking.\n", tid);
DynInstPtr barr_inst = barrierInst[tid];
DynInstPtr serial_inst = serializeInst[tid];
renameStatus[tid] = Unblocking;
@ -1126,21 +1140,21 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
"PC %#x.\n",
tid, barr_inst->seqNum, barr_inst->readPC());
tid, serial_inst->seqNum, serial_inst->readPC());
// Put instruction into queue here.
barr_inst->clearSerializeBefore();
serial_inst->clearSerializeBefore();
if (!skidBuffer[tid].empty()) {
skidBuffer[tid].push_front(barr_inst);
skidBuffer[tid].push_front(serial_inst);
} else {
insts[tid].push_front(barr_inst);
insts[tid].push_front(serial_inst);
}
DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
" Adding to front of list.", tid);
barrierInst[tid] = NULL;
serializeInst[tid] = NULL;
return true;
}