New stats added to O3 model.
--HG-- extra : convert_revision : 7abb491e89e3e1a331cd19aa05ddce5184abf9e0
This commit is contained in:
parent
b14bf03219
commit
676afbe2c7
10 changed files with 555 additions and 76 deletions
|
@ -369,6 +369,8 @@ class DefaultCommit
|
||||||
/** Rename map interface. */
|
/** Rename map interface. */
|
||||||
RenameMap *renameMap[Impl::MaxThreads];
|
RenameMap *renameMap[Impl::MaxThreads];
|
||||||
|
|
||||||
|
void updateComInstStats(DynInstPtr &inst);
|
||||||
|
|
||||||
/** Stat for the total number of committed instructions. */
|
/** Stat for the total number of committed instructions. */
|
||||||
Stats::Scalar<> commitCommittedInsts;
|
Stats::Scalar<> commitCommittedInsts;
|
||||||
/** Stat for the total number of squashed instructions discarded by commit.
|
/** Stat for the total number of squashed instructions discarded by commit.
|
||||||
|
@ -383,15 +385,26 @@ class DefaultCommit
|
||||||
*/
|
*/
|
||||||
Stats::Scalar<> commitNonSpecStalls;
|
Stats::Scalar<> commitNonSpecStalls;
|
||||||
/** Stat for the total number of committed branches. */
|
/** Stat for the total number of committed branches. */
|
||||||
Stats::Scalar<> commitCommittedBranches;
|
// Stats::Scalar<> commitCommittedBranches;
|
||||||
/** Stat for the total number of committed loads. */
|
/** Stat for the total number of committed loads. */
|
||||||
Stats::Scalar<> commitCommittedLoads;
|
// Stats::Scalar<> commitCommittedLoads;
|
||||||
/** Stat for the total number of committed memory references. */
|
/** Stat for the total number of committed memory references. */
|
||||||
Stats::Scalar<> commitCommittedMemRefs;
|
// Stats::Scalar<> commitCommittedMemRefs;
|
||||||
/** Stat for the total number of branch mispredicts that caused a squash. */
|
/** Stat for the total number of branch mispredicts that caused a squash. */
|
||||||
Stats::Scalar<> branchMispredicts;
|
Stats::Scalar<> branchMispredicts;
|
||||||
/** Distribution of the number of committed instructions each cycle. */
|
/** Distribution of the number of committed instructions each cycle. */
|
||||||
Stats::Distribution<> numCommittedDist;
|
Stats::Distribution<> numCommittedDist;
|
||||||
|
|
||||||
|
// total number of instructions committed
|
||||||
|
Stats::Vector<> stat_com_inst;
|
||||||
|
Stats::Vector<> stat_com_swp;
|
||||||
|
Stats::Vector<> stat_com_refs;
|
||||||
|
Stats::Vector<> stat_com_loads;
|
||||||
|
Stats::Vector<> stat_com_membars;
|
||||||
|
Stats::Vector<> stat_com_branches;
|
||||||
|
|
||||||
|
Stats::Scalar<> commit_eligible_samples;
|
||||||
|
Stats::Vector<> commit_eligible;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_O3_COMMIT_HH__
|
#endif // __CPU_O3_COMMIT_HH__
|
||||||
|
|
|
@ -133,6 +133,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultCommit<Impl>::regStats()
|
DefaultCommit<Impl>::regStats()
|
||||||
{
|
{
|
||||||
|
using namespace Stats;
|
||||||
commitCommittedInsts
|
commitCommittedInsts
|
||||||
.name(name() + ".commitCommittedInsts")
|
.name(name() + ".commitCommittedInsts")
|
||||||
.desc("The number of committed instructions")
|
.desc("The number of committed instructions")
|
||||||
|
@ -150,6 +151,7 @@ DefaultCommit<Impl>::regStats()
|
||||||
.desc("The number of times commit has been forced to stall to "
|
.desc("The number of times commit has been forced to stall to "
|
||||||
"communicate backwards")
|
"communicate backwards")
|
||||||
.prereq(commitNonSpecStalls);
|
.prereq(commitNonSpecStalls);
|
||||||
|
/*
|
||||||
commitCommittedBranches
|
commitCommittedBranches
|
||||||
.name(name() + ".commitCommittedBranches")
|
.name(name() + ".commitCommittedBranches")
|
||||||
.desc("The number of committed branches")
|
.desc("The number of committed branches")
|
||||||
|
@ -162,6 +164,7 @@ DefaultCommit<Impl>::regStats()
|
||||||
.name(name() + ".commitCommittedMemRefs")
|
.name(name() + ".commitCommittedMemRefs")
|
||||||
.desc("The number of committed memory references")
|
.desc("The number of committed memory references")
|
||||||
.prereq(commitCommittedMemRefs);
|
.prereq(commitCommittedMemRefs);
|
||||||
|
*/
|
||||||
branchMispredicts
|
branchMispredicts
|
||||||
.name(name() + ".branchMispredicts")
|
.name(name() + ".branchMispredicts")
|
||||||
.desc("The number of times a branch was mispredicted")
|
.desc("The number of times a branch was mispredicted")
|
||||||
|
@ -172,6 +175,73 @@ DefaultCommit<Impl>::regStats()
|
||||||
.desc("Number of insts commited each cycle")
|
.desc("Number of insts commited each cycle")
|
||||||
.flags(Stats::pdf)
|
.flags(Stats::pdf)
|
||||||
;
|
;
|
||||||
|
|
||||||
|
stat_com_inst
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:count")
|
||||||
|
.desc("Number of instructions committed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
stat_com_swp
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:swp_count")
|
||||||
|
.desc("Number of s/w prefetches committed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
stat_com_refs
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:refs")
|
||||||
|
.desc("Number of memory references committed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
stat_com_loads
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:loads")
|
||||||
|
.desc("Number of loads committed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
stat_com_membars
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:membars")
|
||||||
|
.desc("Number of memory barriers committed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
stat_com_branches
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:branches")
|
||||||
|
.desc("Number of branches committed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Commit-Eligible instructions...
|
||||||
|
//
|
||||||
|
// -> The number of instructions eligible to commit in those
|
||||||
|
// cycles where we reached our commit BW limit (less the number
|
||||||
|
// actually committed)
|
||||||
|
//
|
||||||
|
// -> The average value is computed over ALL CYCLES... not just
|
||||||
|
// the BW limited cycles
|
||||||
|
//
|
||||||
|
// -> The standard deviation is computed only over cycles where
|
||||||
|
// we reached the BW limit
|
||||||
|
//
|
||||||
|
commit_eligible
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".COM:bw_limited")
|
||||||
|
.desc("number of insts not committed due to BW limits")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
commit_eligible_samples
|
||||||
|
.name(name() + ".COM:bw_lim_events")
|
||||||
|
.desc("number cycles where commit BW limit reached")
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -1060,9 +1130,7 @@ head_inst->isWriteBarrier())*/
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (head_inst->isControl()) {
|
updateComInstStats(head_inst);
|
||||||
++commitCommittedBranches;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that the instruction is going to be committed, finalize its
|
// Now that the instruction is going to be committed, finalize its
|
||||||
// trace data.
|
// trace data.
|
||||||
|
@ -1186,6 +1254,47 @@ DefaultCommit<Impl>::robDoneSquashing()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
|
||||||
|
{
|
||||||
|
unsigned thread = inst->threadNumber;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pick off the software prefetches
|
||||||
|
//
|
||||||
|
#ifdef TARGET_ALPHA
|
||||||
|
if (inst->isDataPrefetch()) {
|
||||||
|
stat_com_swp[thread]++;
|
||||||
|
} else {
|
||||||
|
stat_com_inst[thread]++;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
stat_com_inst[thread]++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Control Instructions
|
||||||
|
//
|
||||||
|
if (inst->isControl())
|
||||||
|
stat_com_branches[thread]++;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Memory references
|
||||||
|
//
|
||||||
|
if (inst->isMemRef()) {
|
||||||
|
stat_com_refs[thread]++;
|
||||||
|
|
||||||
|
if (inst->isLoad()) {
|
||||||
|
stat_com_loads[thread]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->isMemBarrier()) {
|
||||||
|
stat_com_membars[thread]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// //
|
// //
|
||||||
// SMT COMMIT POLICY MAITAINED HERE //
|
// SMT COMMIT POLICY MAITAINED HERE //
|
||||||
|
|
|
@ -370,6 +370,7 @@ class DefaultFetch
|
||||||
Stats::Scalar<> icacheStallCycles;
|
Stats::Scalar<> icacheStallCycles;
|
||||||
/** Stat for total number of fetched instructions. */
|
/** Stat for total number of fetched instructions. */
|
||||||
Stats::Scalar<> fetchedInsts;
|
Stats::Scalar<> fetchedInsts;
|
||||||
|
Stats::Scalar<> fetchedBranches;
|
||||||
/** Stat for total number of predicted branches. */
|
/** Stat for total number of predicted branches. */
|
||||||
Stats::Scalar<> predictedBranches;
|
Stats::Scalar<> predictedBranches;
|
||||||
/** Stat for total number of cycles spent fetching. */
|
/** Stat for total number of cycles spent fetching. */
|
||||||
|
@ -383,6 +384,8 @@ class DefaultFetch
|
||||||
Stats::Scalar<> fetchBlockedCycles;
|
Stats::Scalar<> fetchBlockedCycles;
|
||||||
/** Stat for total number of fetched cache lines. */
|
/** Stat for total number of fetched cache lines. */
|
||||||
Stats::Scalar<> fetchedCacheLines;
|
Stats::Scalar<> fetchedCacheLines;
|
||||||
|
|
||||||
|
Stats::Scalar<> fetchIcacheSquashes;
|
||||||
/** Distribution of number of instructions fetched each cycle. */
|
/** Distribution of number of instructions fetched each cycle. */
|
||||||
Stats::Distribution<> fetchNisnDist;
|
Stats::Distribution<> fetchNisnDist;
|
||||||
Stats::Formula idleRate;
|
Stats::Formula idleRate;
|
||||||
|
|
|
@ -178,6 +178,11 @@ DefaultFetch<Impl>::regStats()
|
||||||
.desc("Number of instructions fetch has processed")
|
.desc("Number of instructions fetch has processed")
|
||||||
.prereq(fetchedInsts);
|
.prereq(fetchedInsts);
|
||||||
|
|
||||||
|
fetchedBranches
|
||||||
|
.name(name() + ".fetchedBranches")
|
||||||
|
.desc("Number of branches that fetch encountered")
|
||||||
|
.prereq(fetchedBranches);
|
||||||
|
|
||||||
predictedBranches
|
predictedBranches
|
||||||
.name(name() + ".predictedBranches")
|
.name(name() + ".predictedBranches")
|
||||||
.desc("Number of branches that fetch has predicted taken")
|
.desc("Number of branches that fetch has predicted taken")
|
||||||
|
@ -209,6 +214,11 @@ DefaultFetch<Impl>::regStats()
|
||||||
.desc("Number of cache lines fetched")
|
.desc("Number of cache lines fetched")
|
||||||
.prereq(fetchedCacheLines);
|
.prereq(fetchedCacheLines);
|
||||||
|
|
||||||
|
fetchIcacheSquashes
|
||||||
|
.name(name() + ".fetchIcacheSquashes")
|
||||||
|
.desc("Number of outstanding Icache misses that were squashed")
|
||||||
|
.prereq(fetchIcacheSquashes);
|
||||||
|
|
||||||
fetchNisnDist
|
fetchNisnDist
|
||||||
.init(/* base value */ 0,
|
.init(/* base value */ 0,
|
||||||
/* last value */ fetchWidth,
|
/* last value */ fetchWidth,
|
||||||
|
@ -322,8 +332,10 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
|
||||||
// Can keep track of how many cache accesses go unused due to
|
// Can keep track of how many cache accesses go unused due to
|
||||||
// misspeculation here.
|
// misspeculation here.
|
||||||
if (fetchStatus[tid] != IcacheMissStall ||
|
if (fetchStatus[tid] != IcacheMissStall ||
|
||||||
req != memReq[tid])
|
req != memReq[tid]) {
|
||||||
|
++fetchIcacheSquashes;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Wake up the CPU (if it went to sleep and was waiting on this completion
|
// Wake up the CPU (if it went to sleep and was waiting on this completion
|
||||||
// event).
|
// event).
|
||||||
|
@ -400,6 +412,8 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
|
||||||
|
|
||||||
predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
|
predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
|
||||||
|
|
||||||
|
++fetchedBranches;
|
||||||
|
|
||||||
if (predict_taken) {
|
if (predict_taken) {
|
||||||
++predictedBranches;
|
++predictedBranches;
|
||||||
}
|
}
|
||||||
|
@ -457,6 +471,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
||||||
// If translation was successful, attempt to read the first
|
// If translation was successful, attempt to read the first
|
||||||
// instruction.
|
// instruction.
|
||||||
if (fault == NoFault) {
|
if (fault == NoFault) {
|
||||||
|
#if FULL_SYSTEM
|
||||||
if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) {
|
if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) {
|
||||||
DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
|
DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
|
||||||
"misspeculating path!",
|
"misspeculating path!",
|
||||||
|
@ -464,6 +479,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
||||||
ret_fault = TheISA::genMachineCheckFault();
|
ret_fault = TheISA::genMachineCheckFault();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
|
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
|
||||||
fault = cpu->mem->read(memReq[tid], cacheData[tid]);
|
fault = cpu->mem->read(memReq[tid], cacheData[tid]);
|
||||||
|
@ -480,6 +496,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
||||||
|
|
||||||
MemAccessResult result = icacheInterface->access(memReq[tid]);
|
MemAccessResult result = icacheInterface->access(memReq[tid]);
|
||||||
|
|
||||||
|
fetchedCacheLines++;
|
||||||
|
|
||||||
// If the cache missed, then schedule an event to wake
|
// If the cache missed, then schedule an event to wake
|
||||||
// up this stage once the cache miss completes.
|
// up this stage once the cache miss completes.
|
||||||
// @todo: Possibly allow for longer than 1 cycle cache hits.
|
// @todo: Possibly allow for longer than 1 cycle cache hits.
|
||||||
|
@ -499,8 +517,6 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
||||||
"read.\n", tid);
|
"read.\n", tid);
|
||||||
|
|
||||||
// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
|
// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
|
||||||
|
|
||||||
fetchedCacheLines++;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
|
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
|
||||||
|
@ -889,10 +905,14 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||||
if (!fetch_success)
|
if (!fetch_success)
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
if (fetchStatus[tid] == Blocked) {
|
if (fetchStatus[tid] == Idle) {
|
||||||
|
++fetchIdleCycles;
|
||||||
|
} else if (fetchStatus[tid] == Blocked) {
|
||||||
++fetchBlockedCycles;
|
++fetchBlockedCycles;
|
||||||
} else if (fetchStatus[tid] == Squashing) {
|
} else if (fetchStatus[tid] == Squashing) {
|
||||||
++fetchSquashCycles;
|
++fetchSquashCycles;
|
||||||
|
} else if (fetchStatus[tid] == IcacheMissStall) {
|
||||||
|
++icacheStallCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Status is Idle, Squashing, Blocked, or IcacheMissStall, so
|
// Status is Idle, Squashing, Blocked, or IcacheMissStall, so
|
||||||
|
@ -904,6 +924,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||||
|
|
||||||
// If we had a stall due to an icache miss, then return.
|
// If we had a stall due to an icache miss, then return.
|
||||||
if (fetchStatus[tid] == IcacheMissStall) {
|
if (fetchStatus[tid] == IcacheMissStall) {
|
||||||
|
++icacheStallCycles;
|
||||||
status_change = true;
|
status_change = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -278,6 +278,8 @@ class DefaultIEW
|
||||||
void tick();
|
void tick();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void updateExeInstStats(DynInstPtr &inst);
|
||||||
|
|
||||||
/** Pointer to main time buffer used for backwards communication. */
|
/** Pointer to main time buffer used for backwards communication. */
|
||||||
TimeBuffer<TimeStruct> *timeBuffer;
|
TimeBuffer<TimeStruct> *timeBuffer;
|
||||||
|
|
||||||
|
@ -443,9 +445,9 @@ class DefaultIEW
|
||||||
/** Stat for total number of executed instructions. */
|
/** Stat for total number of executed instructions. */
|
||||||
Stats::Scalar<> iewExecutedInsts;
|
Stats::Scalar<> iewExecutedInsts;
|
||||||
/** Stat for total number of executed load instructions. */
|
/** Stat for total number of executed load instructions. */
|
||||||
Stats::Scalar<> iewExecLoadInsts;
|
Stats::Vector<> iewExecLoadInsts;
|
||||||
/** Stat for total number of executed store instructions. */
|
/** Stat for total number of executed store instructions. */
|
||||||
Stats::Scalar<> iewExecStoreInsts;
|
// Stats::Scalar<> iewExecStoreInsts;
|
||||||
/** Stat for total number of squashed instructions skipped at execute. */
|
/** Stat for total number of squashed instructions skipped at execute. */
|
||||||
Stats::Scalar<> iewExecSquashedInsts;
|
Stats::Scalar<> iewExecSquashedInsts;
|
||||||
/** Stat for total number of memory ordering violation events. */
|
/** Stat for total number of memory ordering violation events. */
|
||||||
|
@ -456,6 +458,33 @@ class DefaultIEW
|
||||||
Stats::Scalar<> predictedNotTakenIncorrect;
|
Stats::Scalar<> predictedNotTakenIncorrect;
|
||||||
/** Stat for total number of mispredicted branches detected at execute. */
|
/** Stat for total number of mispredicted branches detected at execute. */
|
||||||
Stats::Formula branchMispredicts;
|
Stats::Formula branchMispredicts;
|
||||||
|
|
||||||
|
Stats::Vector<> exe_swp;
|
||||||
|
Stats::Vector<> exe_nop;
|
||||||
|
Stats::Vector<> exe_refs;
|
||||||
|
Stats::Vector<> exe_branches;
|
||||||
|
|
||||||
|
// Stats::Vector<> issued_ops;
|
||||||
|
/*
|
||||||
|
Stats::Vector<> stat_fu_busy;
|
||||||
|
Stats::Vector2d<> stat_fuBusy;
|
||||||
|
Stats::Vector<> dist_unissued;
|
||||||
|
Stats::Vector2d<> stat_issued_inst_type;
|
||||||
|
*/
|
||||||
|
Stats::Formula issue_rate;
|
||||||
|
Stats::Formula iewExecStoreInsts;
|
||||||
|
// Stats::Formula issue_op_rate;
|
||||||
|
// Stats::Formula fu_busy_rate;
|
||||||
|
|
||||||
|
Stats::Vector<> iewInstsToCommit;
|
||||||
|
Stats::Vector<> writeback_count;
|
||||||
|
Stats::Vector<> producer_inst;
|
||||||
|
Stats::Vector<> consumer_inst;
|
||||||
|
Stats::Vector<> wb_penalized;
|
||||||
|
|
||||||
|
Stats::Formula wb_rate;
|
||||||
|
Stats::Formula wb_fanout;
|
||||||
|
Stats::Formula wb_penalized_rate;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_O3_IEW_HH__
|
#endif // __CPU_O3_IEW_HH__
|
||||||
|
|
|
@ -140,6 +140,8 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::regStats()
|
DefaultIEW<Impl>::regStats()
|
||||||
{
|
{
|
||||||
|
using namespace Stats;
|
||||||
|
|
||||||
instQueue.regStats();
|
instQueue.regStats();
|
||||||
|
|
||||||
//ldstQueue.regStats();
|
//ldstQueue.regStats();
|
||||||
|
@ -195,13 +197,15 @@ DefaultIEW<Impl>::regStats()
|
||||||
.desc("Number of executed instructions");
|
.desc("Number of executed instructions");
|
||||||
|
|
||||||
iewExecLoadInsts
|
iewExecLoadInsts
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
.name(name() + ".iewExecLoadInsts")
|
.name(name() + ".iewExecLoadInsts")
|
||||||
.desc("Number of load instructions executed");
|
.desc("Number of load instructions executed")
|
||||||
|
.flags(total);
|
||||||
|
/*
|
||||||
iewExecStoreInsts
|
iewExecStoreInsts
|
||||||
.name(name() + ".iewExecStoreInsts")
|
.name(name() + ".iewExecStoreInsts")
|
||||||
.desc("Number of store instructions executed");
|
.desc("Number of store instructions executed");
|
||||||
|
*/
|
||||||
iewExecSquashedInsts
|
iewExecSquashedInsts
|
||||||
.name(name() + ".iewExecSquashedInsts")
|
.name(name() + ".iewExecSquashedInsts")
|
||||||
.desc("Number of squashed instructions skipped in execute");
|
.desc("Number of squashed instructions skipped in execute");
|
||||||
|
@ -223,6 +227,116 @@ DefaultIEW<Impl>::regStats()
|
||||||
.desc("Number of branch mispredicts detected at execute");
|
.desc("Number of branch mispredicts detected at execute");
|
||||||
|
|
||||||
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
|
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
|
||||||
|
|
||||||
|
exe_swp
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".EXEC:swp")
|
||||||
|
.desc("number of swp insts executed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
exe_nop
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".EXEC:nop")
|
||||||
|
.desc("number of nop insts executed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
exe_refs
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".EXEC:refs")
|
||||||
|
.desc("number of memory reference insts executed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
exe_branches
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".EXEC:branches")
|
||||||
|
.desc("Number of branches executed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
issue_rate
|
||||||
|
.name(name() + ".EXEC:rate")
|
||||||
|
.desc("Inst execution rate")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
issue_rate = iewExecutedInsts / cpu->numCycles;
|
||||||
|
|
||||||
|
iewExecStoreInsts
|
||||||
|
.name(name() + ".EXEC:stores")
|
||||||
|
.desc("Number of stores executed")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
iewExecStoreInsts = exe_refs - iewExecLoadInsts;
|
||||||
|
/*
|
||||||
|
for (int i=0; i<Num_OpClasses; ++i) {
|
||||||
|
stringstream subname;
|
||||||
|
subname << opClassStrings[i] << "_delay";
|
||||||
|
issue_delay_dist.subname(i, subname.str());
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
//
|
||||||
|
// Other stats
|
||||||
|
//
|
||||||
|
|
||||||
|
iewInstsToCommit
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".WB:sent")
|
||||||
|
.desc("cumulative count of insts sent to commit")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
writeback_count
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".WB:count")
|
||||||
|
.desc("cumulative count of insts written-back")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
producer_inst
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".WB:producers")
|
||||||
|
.desc("num instructions producing a value")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
consumer_inst
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".WB:consumers")
|
||||||
|
.desc("num instructions consuming a value")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
wb_penalized
|
||||||
|
.init(cpu->number_of_threads)
|
||||||
|
.name(name() + ".WB:penalized")
|
||||||
|
.desc("number of instrctions required to write to 'other' IQ")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
wb_penalized_rate
|
||||||
|
.name(name() + ".WB:penalized_rate")
|
||||||
|
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
wb_penalized_rate = wb_penalized / writeback_count;
|
||||||
|
|
||||||
|
wb_fanout
|
||||||
|
.name(name() + ".WB:fanout")
|
||||||
|
.desc("average fanout of values written-back")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
wb_fanout = producer_inst / consumer_inst;
|
||||||
|
|
||||||
|
wb_rate
|
||||||
|
.name(name() + ".WB:rate")
|
||||||
|
.desc("insts written-back per cycle")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
wb_rate = writeback_count / cpu->numCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
@ -990,6 +1104,8 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
|
||||||
|
|
||||||
instQueue.advanceTail(inst);
|
instQueue.advanceTail(inst);
|
||||||
|
|
||||||
|
exe_nop[tid]++;
|
||||||
|
|
||||||
add_to_iq = false;
|
add_to_iq = false;
|
||||||
} else if (inst->isExecuted()) {
|
} else if (inst->isExecuted()) {
|
||||||
assert(0 && "Instruction shouldn't be executed.\n");
|
assert(0 && "Instruction shouldn't be executed.\n");
|
||||||
|
@ -1124,11 +1240,11 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
// event adds the instruction to the queue to commit
|
// event adds the instruction to the queue to commit
|
||||||
fault = ldstQueue.executeLoad(inst);
|
fault = ldstQueue.executeLoad(inst);
|
||||||
|
|
||||||
++iewExecLoadInsts;
|
// ++iewExecLoadInsts;
|
||||||
} else if (inst->isStore()) {
|
} else if (inst->isStore()) {
|
||||||
ldstQueue.executeStore(inst);
|
ldstQueue.executeStore(inst);
|
||||||
|
|
||||||
++iewExecStoreInsts;
|
// ++iewExecStoreInsts;
|
||||||
|
|
||||||
// If the store had a fault then it may not have a mem req
|
// If the store had a fault then it may not have a mem req
|
||||||
if (inst->req && !(inst->req->flags & LOCKED)) {
|
if (inst->req && !(inst->req->flags & LOCKED)) {
|
||||||
|
@ -1146,13 +1262,13 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
} else {
|
} else {
|
||||||
inst->execute();
|
inst->execute();
|
||||||
|
|
||||||
++iewExecutedInsts;
|
|
||||||
|
|
||||||
inst->setExecuted();
|
inst->setExecuted();
|
||||||
|
|
||||||
instToCommit(inst);
|
instToCommit(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateExeInstStats(inst);
|
||||||
|
|
||||||
// Check if branch was correct. This check happens after the
|
// Check if branch was correct. This check happens after the
|
||||||
// instruction is added to the queue because even if the branch
|
// instruction is added to the queue because even if the branch
|
||||||
// is mispredicted, the branch instruction itself is still valid.
|
// is mispredicted, the branch instruction itself is still valid.
|
||||||
|
@ -1243,17 +1359,20 @@ DefaultIEW<Impl>::writebackInsts()
|
||||||
for (int inst_num = 0; inst_num < issueWidth &&
|
for (int inst_num = 0; inst_num < issueWidth &&
|
||||||
toCommit->insts[inst_num]; inst_num++) {
|
toCommit->insts[inst_num]; inst_num++) {
|
||||||
DynInstPtr inst = toCommit->insts[inst_num];
|
DynInstPtr inst = toCommit->insts[inst_num];
|
||||||
|
int tid = inst->threadNumber;
|
||||||
|
|
||||||
DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
|
DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
|
||||||
inst->readPC());
|
inst->readPC());
|
||||||
|
|
||||||
|
iewInstsToCommit[tid]++;
|
||||||
|
|
||||||
// Some instructions will be sent to commit without having
|
// Some instructions will be sent to commit without having
|
||||||
// executed because they need commit to handle them.
|
// executed because they need commit to handle them.
|
||||||
// E.g. Uncached loads have not actually executed when they
|
// E.g. Uncached loads have not actually executed when they
|
||||||
// are first sent to commit. Instead commit must tell the LSQ
|
// are first sent to commit. Instead commit must tell the LSQ
|
||||||
// when it's ready to execute the uncached load.
|
// when it's ready to execute the uncached load.
|
||||||
if (!inst->isSquashed() && inst->isExecuted()) {
|
if (!inst->isSquashed() && inst->isExecuted()) {
|
||||||
instQueue.wakeDependents(inst);
|
int dependents = instQueue.wakeDependents(inst);
|
||||||
|
|
||||||
for (int i = 0; i < inst->numDestRegs(); i++) {
|
for (int i = 0; i < inst->numDestRegs(); i++) {
|
||||||
//mark as Ready
|
//mark as Ready
|
||||||
|
@ -1261,6 +1380,10 @@ DefaultIEW<Impl>::writebackInsts()
|
||||||
inst->renamedDestRegIdx(i));
|
inst->renamedDestRegIdx(i));
|
||||||
scoreboard->setReg(inst->renamedDestRegIdx(i));
|
scoreboard->setReg(inst->renamedDestRegIdx(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
producer_inst[tid]++;
|
||||||
|
consumer_inst[tid]+= dependents;
|
||||||
|
writeback_count[tid]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1390,3 +1513,39 @@ DefaultIEW<Impl>::tick()
|
||||||
cpu->activityThisCycle();
|
cpu->activityThisCycle();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
|
||||||
|
{
|
||||||
|
int thread_number = inst->threadNumber;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pick off the software prefetches
|
||||||
|
//
|
||||||
|
#ifdef TARGET_ALPHA
|
||||||
|
if (inst->isDataPrefetch())
|
||||||
|
exe_swp[thread_number]++;
|
||||||
|
else
|
||||||
|
iewExecutedInsts++;
|
||||||
|
#else
|
||||||
|
iewExecutedInsts[thread_number]++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Control operations
|
||||||
|
//
|
||||||
|
if (inst->isControl())
|
||||||
|
exe_branches[thread_number]++;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Memory operations
|
||||||
|
//
|
||||||
|
if (inst->isMemRef()) {
|
||||||
|
exe_refs[thread_number]++;
|
||||||
|
|
||||||
|
if (inst->isLoad()) {
|
||||||
|
iewExecLoadInsts[thread_number]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -185,7 +185,7 @@ class InstructionQueue
|
||||||
void commit(const InstSeqNum &inst, unsigned tid = 0);
|
void commit(const InstSeqNum &inst, unsigned tid = 0);
|
||||||
|
|
||||||
/** Wakes all dependents of a completed instruction. */
|
/** Wakes all dependents of a completed instruction. */
|
||||||
void wakeDependents(DynInstPtr &completed_inst);
|
int wakeDependents(DynInstPtr &completed_inst);
|
||||||
|
|
||||||
/** Adds a ready memory instruction to the ready list. */
|
/** Adds a ready memory instruction to the ready list. */
|
||||||
void addReadyMemInst(DynInstPtr &ready_inst);
|
void addReadyMemInst(DynInstPtr &ready_inst);
|
||||||
|
@ -479,6 +479,7 @@ class InstructionQueue
|
||||||
/** Stat for number of non-speculative instructions added. */
|
/** Stat for number of non-speculative instructions added. */
|
||||||
Stats::Scalar<> iqNonSpecInstsAdded;
|
Stats::Scalar<> iqNonSpecInstsAdded;
|
||||||
// Stats::Scalar<> iqIntInstsAdded;
|
// Stats::Scalar<> iqIntInstsAdded;
|
||||||
|
Stats::Scalar<> iqInstsIssued;
|
||||||
/** Stat for number of integer instructions issued. */
|
/** Stat for number of integer instructions issued. */
|
||||||
Stats::Scalar<> iqIntInstsIssued;
|
Stats::Scalar<> iqIntInstsIssued;
|
||||||
// Stats::Scalar<> iqFloatInstsAdded;
|
// Stats::Scalar<> iqFloatInstsAdded;
|
||||||
|
@ -505,6 +506,20 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
||||||
|
|
||||||
|
Stats::VectorDistribution<> queue_res_dist;
|
||||||
|
Stats::Vector<> n_issued_dist;
|
||||||
|
Stats::VectorDistribution<> issue_delay_dist;
|
||||||
|
|
||||||
|
Stats::Vector<> stat_fu_busy;
|
||||||
|
// Stats::Vector<> dist_unissued;
|
||||||
|
Stats::Vector2d<> stat_issued_inst_type;
|
||||||
|
|
||||||
|
Stats::Formula issue_rate;
|
||||||
|
// Stats::Formula issue_stores;
|
||||||
|
// Stats::Formula issue_op_rate;
|
||||||
|
Stats::Vector<> fu_busy; //cumulative fu busy
|
||||||
|
|
||||||
|
Stats::Formula fu_busy_rate;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__CPU_O3_INST_QUEUE_HH__
|
#endif //__CPU_O3_INST_QUEUE_HH__
|
||||||
|
|
|
@ -224,6 +224,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
InstructionQueue<Impl>::regStats()
|
InstructionQueue<Impl>::regStats()
|
||||||
{
|
{
|
||||||
|
using namespace Stats;
|
||||||
iqInstsAdded
|
iqInstsAdded
|
||||||
.name(name() + ".iqInstsAdded")
|
.name(name() + ".iqInstsAdded")
|
||||||
.desc("Number of instructions added to the IQ (excludes non-spec)")
|
.desc("Number of instructions added to the IQ (excludes non-spec)")
|
||||||
|
@ -236,6 +237,11 @@ InstructionQueue<Impl>::regStats()
|
||||||
|
|
||||||
// iqIntInstsAdded;
|
// iqIntInstsAdded;
|
||||||
|
|
||||||
|
iqInstsIssued
|
||||||
|
.name(name() + ".iqInstsIssued")
|
||||||
|
.desc("Number of instructions issued")
|
||||||
|
.prereq(iqInstsIssued);
|
||||||
|
|
||||||
iqIntInstsIssued
|
iqIntInstsIssued
|
||||||
.name(name() + ".iqIntInstsIssued")
|
.name(name() + ".iqIntInstsIssued")
|
||||||
.desc("Number of integer instructions issued")
|
.desc("Number of integer instructions issued")
|
||||||
|
@ -291,6 +297,103 @@ InstructionQueue<Impl>::regStats()
|
||||||
.desc("Number of squashed non-spec instructions that were removed")
|
.desc("Number of squashed non-spec instructions that were removed")
|
||||||
.prereq(iqSquashedNonSpecRemoved);
|
.prereq(iqSquashedNonSpecRemoved);
|
||||||
|
|
||||||
|
queue_res_dist
|
||||||
|
.init(Num_OpClasses, 0, 99, 2)
|
||||||
|
.name(name() + ".IQ:residence:")
|
||||||
|
.desc("cycles from dispatch to issue")
|
||||||
|
.flags(total | pdf | cdf )
|
||||||
|
;
|
||||||
|
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||||
|
queue_res_dist.subname(i, opClassStrings[i]);
|
||||||
|
}
|
||||||
|
n_issued_dist
|
||||||
|
.init(totalWidth + 1)
|
||||||
|
.name(name() + ".ISSUE:issued_per_cycle")
|
||||||
|
.desc("Number of insts issued each cycle")
|
||||||
|
.flags(total | pdf | dist)
|
||||||
|
;
|
||||||
|
/*
|
||||||
|
dist_unissued
|
||||||
|
.init(Num_OpClasses+2)
|
||||||
|
.name(name() + ".ISSUE:unissued_cause")
|
||||||
|
.desc("Reason ready instruction not issued")
|
||||||
|
.flags(pdf | dist)
|
||||||
|
;
|
||||||
|
for (int i=0; i < (Num_OpClasses + 2); ++i) {
|
||||||
|
dist_unissued.subname(i, unissued_names[i]);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
stat_issued_inst_type
|
||||||
|
.init(numThreads,Num_OpClasses)
|
||||||
|
.name(name() + ".ISSUE:FU_type")
|
||||||
|
.desc("Type of FU issued")
|
||||||
|
.flags(total | pdf | dist)
|
||||||
|
;
|
||||||
|
stat_issued_inst_type.ysubnames(opClassStrings);
|
||||||
|
|
||||||
|
//
|
||||||
|
// How long did instructions for a particular FU type wait prior to issue
|
||||||
|
//
|
||||||
|
|
||||||
|
issue_delay_dist
|
||||||
|
.init(Num_OpClasses,0,99,2)
|
||||||
|
.name(name() + ".ISSUE:")
|
||||||
|
.desc("cycles from operands ready to issue")
|
||||||
|
.flags(pdf | cdf)
|
||||||
|
;
|
||||||
|
|
||||||
|
for (int i=0; i<Num_OpClasses; ++i) {
|
||||||
|
stringstream subname;
|
||||||
|
subname << opClassStrings[i] << "_delay";
|
||||||
|
issue_delay_dist.subname(i, subname.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
issue_rate
|
||||||
|
.name(name() + ".ISSUE:rate")
|
||||||
|
.desc("Inst issue rate")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
issue_rate = iqInstsIssued / cpu->numCycles;
|
||||||
|
/*
|
||||||
|
issue_stores
|
||||||
|
.name(name() + ".ISSUE:stores")
|
||||||
|
.desc("Number of stores issued")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
issue_stores = exe_refs - exe_loads;
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
issue_op_rate
|
||||||
|
.name(name() + ".ISSUE:op_rate")
|
||||||
|
.desc("Operation issue rate")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
issue_op_rate = issued_ops / numCycles;
|
||||||
|
*/
|
||||||
|
stat_fu_busy
|
||||||
|
.init(Num_OpClasses)
|
||||||
|
.name(name() + ".ISSUE:fu_full")
|
||||||
|
.desc("attempts to use FU when none available")
|
||||||
|
.flags(pdf | dist)
|
||||||
|
;
|
||||||
|
for (int i=0; i < Num_OpClasses; ++i) {
|
||||||
|
stat_fu_busy.subname(i, opClassStrings[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
fu_busy
|
||||||
|
.init(numThreads)
|
||||||
|
.name(name() + ".ISSUE:fu_busy_cnt")
|
||||||
|
.desc("FU busy when requested")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
|
||||||
|
fu_busy_rate
|
||||||
|
.name(name() + ".ISSUE:fu_busy_rate")
|
||||||
|
.desc("FU busy rate (busy events/executed inst)")
|
||||||
|
.flags(total)
|
||||||
|
;
|
||||||
|
fu_busy_rate = fu_busy / iqInstsIssued;
|
||||||
|
|
||||||
for ( int i=0; i < numThreads; i++) {
|
for ( int i=0; i < numThreads; i++) {
|
||||||
// Tell mem dependence unit to reg stats as well.
|
// Tell mem dependence unit to reg stats as well.
|
||||||
memDepUnit[i].regStats();
|
memDepUnit[i].regStats();
|
||||||
|
@ -658,6 +761,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
|
|
||||||
int idx = fuPool->getUnit(op_class);
|
int idx = fuPool->getUnit(op_class);
|
||||||
|
|
||||||
|
int tid = issuing_inst->threadNumber;
|
||||||
|
|
||||||
if (idx == -2) {
|
if (idx == -2) {
|
||||||
assert(op_class == No_OpClass);
|
assert(op_class == No_OpClass);
|
||||||
|
|
||||||
|
@ -666,7 +771,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
|
|
||||||
DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU"
|
DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU"
|
||||||
" %#x [sn:%lli]\n",
|
" %#x [sn:%lli]\n",
|
||||||
issuing_inst->threadNumber, issuing_inst->readPC(),
|
tid, issuing_inst->readPC(),
|
||||||
issuing_inst->seqNum);
|
issuing_inst->seqNum);
|
||||||
|
|
||||||
readyInsts[op_class].pop();
|
readyInsts[op_class].pop();
|
||||||
|
@ -685,14 +790,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
// Memory instructions can not be freed from the IQ until they
|
// Memory instructions can not be freed from the IQ until they
|
||||||
// complete.
|
// complete.
|
||||||
++freeEntries;
|
++freeEntries;
|
||||||
count[issuing_inst->threadNumber]--;
|
count[tid]--;
|
||||||
issuing_inst->removeInIQ();
|
issuing_inst->removeInIQ();
|
||||||
} else {
|
} else {
|
||||||
memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
|
memDepUnit[tid].issue(issuing_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
listOrder.erase(order_it++);
|
listOrder.erase(order_it++);
|
||||||
|
|
||||||
|
stat_issued_inst_type[tid][op_class]++;
|
||||||
} else if (idx != -1) {
|
} else if (idx != -1) {
|
||||||
int op_latency = fuPool->getOpLatency(op_class);
|
int op_latency = fuPool->getOpLatency(op_class);
|
||||||
|
|
||||||
|
@ -722,7 +828,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
|
|
||||||
DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
|
DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
|
||||||
"[sn:%lli]\n",
|
"[sn:%lli]\n",
|
||||||
issuing_inst->threadNumber, issuing_inst->readPC(),
|
tid, issuing_inst->readPC(),
|
||||||
issuing_inst->seqNum);
|
issuing_inst->seqNum);
|
||||||
|
|
||||||
readyInsts[op_class].pop();
|
readyInsts[op_class].pop();
|
||||||
|
@ -741,14 +847,17 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
// Memory instructions can not be freed from the IQ until they
|
// Memory instructions can not be freed from the IQ until they
|
||||||
// complete.
|
// complete.
|
||||||
++freeEntries;
|
++freeEntries;
|
||||||
count[issuing_inst->threadNumber]--;
|
count[tid]--;
|
||||||
issuing_inst->removeInIQ();
|
issuing_inst->removeInIQ();
|
||||||
} else {
|
} else {
|
||||||
memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
|
memDepUnit[tid].issue(issuing_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
listOrder.erase(order_it++);
|
listOrder.erase(order_it++);
|
||||||
|
stat_issued_inst_type[tid][op_class]++;
|
||||||
} else {
|
} else {
|
||||||
|
stat_fu_busy[op_class]++;
|
||||||
|
fu_busy[tid]++;
|
||||||
++order_it;
|
++order_it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -808,9 +917,11 @@ InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
int
|
||||||
InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
|
InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
|
||||||
{
|
{
|
||||||
|
int dependents = 0;
|
||||||
|
|
||||||
DPRINTF(IQ, "Waking dependents of completed instruction.\n");
|
DPRINTF(IQ, "Waking dependents of completed instruction.\n");
|
||||||
|
|
||||||
assert(!completed_inst->isSquashed());
|
assert(!completed_inst->isSquashed());
|
||||||
|
@ -875,6 +986,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
|
||||||
curr = prev->next;
|
curr = prev->next;
|
||||||
prev->inst = NULL;
|
prev->inst = NULL;
|
||||||
|
|
||||||
|
++dependents;
|
||||||
|
|
||||||
delete prev;
|
delete prev;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -886,6 +999,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
|
||||||
// Mark the scoreboard as having that register ready.
|
// Mark the scoreboard as having that register ready.
|
||||||
regScoreboard[dest_reg] = true;
|
regScoreboard[dest_reg] = true;
|
||||||
}
|
}
|
||||||
|
return dependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
|
|
@ -90,7 +90,7 @@ class DefaultRename
|
||||||
Squashing,
|
Squashing,
|
||||||
Blocked,
|
Blocked,
|
||||||
Unblocking,
|
Unblocking,
|
||||||
BarrierStall
|
SerializeStall
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -359,8 +359,8 @@ class DefaultRename
|
||||||
/** Tracks which stages are telling decode to stall. */
|
/** Tracks which stages are telling decode to stall. */
|
||||||
Stalls stalls[Impl::MaxThreads];
|
Stalls stalls[Impl::MaxThreads];
|
||||||
|
|
||||||
/** The barrier instruction that rename has stalled on. */
|
/** The serialize instruction that rename has stalled on. */
|
||||||
DynInstPtr barrierInst[Impl::MaxThreads];
|
DynInstPtr serializeInst[Impl::MaxThreads];
|
||||||
|
|
||||||
/** Records if rename needs to serialize on the next instruction for any
|
/** Records if rename needs to serialize on the next instruction for any
|
||||||
* thread.
|
* thread.
|
||||||
|
@ -419,8 +419,8 @@ class DefaultRename
|
||||||
Stats::Scalar<> renameIdleCycles;
|
Stats::Scalar<> renameIdleCycles;
|
||||||
/** Stat for total number of cycles spent blocking. */
|
/** Stat for total number of cycles spent blocking. */
|
||||||
Stats::Scalar<> renameBlockCycles;
|
Stats::Scalar<> renameBlockCycles;
|
||||||
/** Stat for total number of cycles spent stalling for a barrier. */
|
/** Stat for total number of cycles spent stalling for a serializing inst. */
|
||||||
Stats::Scalar<> renameBarrierCycles;
|
Stats::Scalar<> renameSerializeStallCycles;
|
||||||
/** Stat for total number of cycles spent running normally. */
|
/** Stat for total number of cycles spent running normally. */
|
||||||
Stats::Scalar<> renameRunCycles;
|
Stats::Scalar<> renameRunCycles;
|
||||||
/** Stat for total number of cycles spent unblocking. */
|
/** Stat for total number of cycles spent unblocking. */
|
||||||
|
@ -446,6 +446,8 @@ class DefaultRename
|
||||||
Stats::Scalar<> renameCommittedMaps;
|
Stats::Scalar<> renameCommittedMaps;
|
||||||
/** Stat for total number of mappings that were undone due to a squash. */
|
/** Stat for total number of mappings that were undone due to a squash. */
|
||||||
Stats::Scalar<> renameUndoneMaps;
|
Stats::Scalar<> renameUndoneMaps;
|
||||||
|
Stats::Scalar<> renamedSerializing;
|
||||||
|
Stats::Scalar<> renamedTempSerializing;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_O3_RENAME_HH__
|
#endif // __CPU_O3_RENAME_HH__
|
||||||
|
|
|
@ -53,7 +53,7 @@ DefaultRename<Impl>::DefaultRename(Params *params)
|
||||||
|
|
||||||
stalls[i].iew = false;
|
stalls[i].iew = false;
|
||||||
stalls[i].commit = false;
|
stalls[i].commit = false;
|
||||||
barrierInst[i] = NULL;
|
serializeInst[i] = NULL;
|
||||||
|
|
||||||
instsInProgress[i] = 0;
|
instsInProgress[i] = 0;
|
||||||
|
|
||||||
|
@ -78,69 +78,79 @@ void
|
||||||
DefaultRename<Impl>::regStats()
|
DefaultRename<Impl>::regStats()
|
||||||
{
|
{
|
||||||
renameSquashCycles
|
renameSquashCycles
|
||||||
.name(name() + ".renameSquashCycles")
|
.name(name() + ".RENAME:SquashCycles")
|
||||||
.desc("Number of cycles rename is squashing")
|
.desc("Number of cycles rename is squashing")
|
||||||
.prereq(renameSquashCycles);
|
.prereq(renameSquashCycles);
|
||||||
renameIdleCycles
|
renameIdleCycles
|
||||||
.name(name() + ".renameIdleCycles")
|
.name(name() + ".RENAME:IdleCycles")
|
||||||
.desc("Number of cycles rename is idle")
|
.desc("Number of cycles rename is idle")
|
||||||
.prereq(renameIdleCycles);
|
.prereq(renameIdleCycles);
|
||||||
renameBlockCycles
|
renameBlockCycles
|
||||||
.name(name() + ".renameBlockCycles")
|
.name(name() + ".RENAME:BlockCycles")
|
||||||
.desc("Number of cycles rename is blocking")
|
.desc("Number of cycles rename is blocking")
|
||||||
.prereq(renameBlockCycles);
|
.prereq(renameBlockCycles);
|
||||||
renameBarrierCycles
|
renameSerializeStallCycles
|
||||||
.name(name() + ".renameBarrierCycles")
|
.name(name() + ".RENAME:serializeStallCycles")
|
||||||
.desc("Number of cycles rename is blocking due to a barrier stall")
|
.desc("count of cycles rename stalled for serializing inst")
|
||||||
.prereq(renameBarrierCycles);
|
.flags(Stats::total);
|
||||||
renameRunCycles
|
renameRunCycles
|
||||||
.name(name() + ".renameRunCycles")
|
.name(name() + ".RENAME:RunCycles")
|
||||||
.desc("Number of cycles rename is running")
|
.desc("Number of cycles rename is running")
|
||||||
.prereq(renameIdleCycles);
|
.prereq(renameIdleCycles);
|
||||||
renameUnblockCycles
|
renameUnblockCycles
|
||||||
.name(name() + ".renameUnblockCycles")
|
.name(name() + ".RENAME:UnblockCycles")
|
||||||
.desc("Number of cycles rename is unblocking")
|
.desc("Number of cycles rename is unblocking")
|
||||||
.prereq(renameUnblockCycles);
|
.prereq(renameUnblockCycles);
|
||||||
renameRenamedInsts
|
renameRenamedInsts
|
||||||
.name(name() + ".renameRenamedInsts")
|
.name(name() + ".RENAME:RenamedInsts")
|
||||||
.desc("Number of instructions processed by rename")
|
.desc("Number of instructions processed by rename")
|
||||||
.prereq(renameRenamedInsts);
|
.prereq(renameRenamedInsts);
|
||||||
renameSquashedInsts
|
renameSquashedInsts
|
||||||
.name(name() + ".renameSquashedInsts")
|
.name(name() + ".RENAME:SquashedInsts")
|
||||||
.desc("Number of squashed instructions processed by rename")
|
.desc("Number of squashed instructions processed by rename")
|
||||||
.prereq(renameSquashedInsts);
|
.prereq(renameSquashedInsts);
|
||||||
renameROBFullEvents
|
renameROBFullEvents
|
||||||
.name(name() + ".renameROBFullEvents")
|
.name(name() + ".RENAME:ROBFullEvents")
|
||||||
.desc("Number of times rename has blocked due to ROB full")
|
.desc("Number of times rename has blocked due to ROB full")
|
||||||
.prereq(renameROBFullEvents);
|
.prereq(renameROBFullEvents);
|
||||||
renameIQFullEvents
|
renameIQFullEvents
|
||||||
.name(name() + ".renameIQFullEvents")
|
.name(name() + ".RENAME:IQFullEvents")
|
||||||
.desc("Number of times rename has blocked due to IQ full")
|
.desc("Number of times rename has blocked due to IQ full")
|
||||||
.prereq(renameIQFullEvents);
|
.prereq(renameIQFullEvents);
|
||||||
renameLSQFullEvents
|
renameLSQFullEvents
|
||||||
.name(name() + ".renameLSQFullEvents")
|
.name(name() + ".RENAME:LSQFullEvents")
|
||||||
.desc("Number of times rename has blocked due to LSQ full")
|
.desc("Number of times rename has blocked due to LSQ full")
|
||||||
.prereq(renameLSQFullEvents);
|
.prereq(renameLSQFullEvents);
|
||||||
renameFullRegistersEvents
|
renameFullRegistersEvents
|
||||||
.name(name() + ".renameFullRegisterEvents")
|
.name(name() + ".RENAME:FullRegisterEvents")
|
||||||
.desc("Number of times there has been no free registers")
|
.desc("Number of times there has been no free registers")
|
||||||
.prereq(renameFullRegistersEvents);
|
.prereq(renameFullRegistersEvents);
|
||||||
renameRenamedOperands
|
renameRenamedOperands
|
||||||
.name(name() + ".renameRenamedOperands")
|
.name(name() + ".RENAME:RenamedOperands")
|
||||||
.desc("Number of destination operands rename has renamed")
|
.desc("Number of destination operands rename has renamed")
|
||||||
.prereq(renameRenamedOperands);
|
.prereq(renameRenamedOperands);
|
||||||
renameRenameLookups
|
renameRenameLookups
|
||||||
.name(name() + ".renameRenameLookups")
|
.name(name() + ".RENAME:RenameLookups")
|
||||||
.desc("Number of register rename lookups that rename has made")
|
.desc("Number of register rename lookups that rename has made")
|
||||||
.prereq(renameRenameLookups);
|
.prereq(renameRenameLookups);
|
||||||
renameCommittedMaps
|
renameCommittedMaps
|
||||||
.name(name() + ".renameCommittedMaps")
|
.name(name() + ".RENAME:CommittedMaps")
|
||||||
.desc("Number of HB maps that are committed")
|
.desc("Number of HB maps that are committed")
|
||||||
.prereq(renameCommittedMaps);
|
.prereq(renameCommittedMaps);
|
||||||
renameUndoneMaps
|
renameUndoneMaps
|
||||||
.name(name() + ".renameUndoneMaps")
|
.name(name() + ".RENAME:UndoneMaps")
|
||||||
.desc("Number of HB maps that are undone due to squashing")
|
.desc("Number of HB maps that are undone due to squashing")
|
||||||
.prereq(renameUndoneMaps);
|
.prereq(renameUndoneMaps);
|
||||||
|
renamedSerializing
|
||||||
|
.name(name() + ".RENAME:serializingInsts")
|
||||||
|
.desc("count of serializing insts renamed")
|
||||||
|
.flags(Stats::total)
|
||||||
|
;
|
||||||
|
renamedTempSerializing
|
||||||
|
.name(name() + ".RENAME:tempSerializingInsts")
|
||||||
|
.desc("count of temporary serializing insts renamed")
|
||||||
|
.flags(Stats::total)
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -254,7 +264,7 @@ DefaultRename<Impl>::squash(unsigned tid)
|
||||||
// cycle and there should be space to hold everything due to the squash.
|
// cycle and there should be space to hold everything due to the squash.
|
||||||
if (renameStatus[tid] == Blocked ||
|
if (renameStatus[tid] == Blocked ||
|
||||||
renameStatus[tid] == Unblocking ||
|
renameStatus[tid] == Unblocking ||
|
||||||
renameStatus[tid] == BarrierStall) {
|
renameStatus[tid] == SerializeStall) {
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
// In syscall emulation, we can have both a block and a squash due
|
// In syscall emulation, we can have both a block and a squash due
|
||||||
// to a syscall in the same cycle. This would cause both signals to
|
// to a syscall in the same cycle. This would cause both signals to
|
||||||
|
@ -267,7 +277,7 @@ DefaultRename<Impl>::squash(unsigned tid)
|
||||||
#else
|
#else
|
||||||
toDecode->renameUnblock[tid] = 1;
|
toDecode->renameUnblock[tid] = 1;
|
||||||
#endif
|
#endif
|
||||||
barrierInst[tid] = NULL;
|
serializeInst[tid] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the status to Squashing.
|
// Set the status to Squashing.
|
||||||
|
@ -370,8 +380,8 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
|
||||||
++renameBlockCycles;
|
++renameBlockCycles;
|
||||||
} else if (renameStatus[tid] == Squashing) {
|
} else if (renameStatus[tid] == Squashing) {
|
||||||
++renameSquashCycles;
|
++renameSquashCycles;
|
||||||
} else if (renameStatus[tid] == BarrierStall) {
|
} else if (renameStatus[tid] == SerializeStall) {
|
||||||
++renameBarrierCycles;
|
++renameSerializeStallCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (renameStatus[tid] == Running ||
|
if (renameStatus[tid] == Running ||
|
||||||
|
@ -535,14 +545,18 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
|
||||||
if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
|
if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
|
||||||
DPRINTF(Rename, "Serialize before instruction encountered.\n");
|
DPRINTF(Rename, "Serialize before instruction encountered.\n");
|
||||||
|
|
||||||
if (!inst->isTempSerializeBefore())
|
if (!inst->isTempSerializeBefore()) {
|
||||||
|
renamedSerializing++;
|
||||||
inst->setSerializeHandled();
|
inst->setSerializeHandled();
|
||||||
|
} else {
|
||||||
|
renamedTempSerializing++;
|
||||||
|
}
|
||||||
|
|
||||||
// Change status over to BarrierStall so that other stages know
|
// Change status over to SerializeStall so that other stages know
|
||||||
// what this is blocked on.
|
// what this is blocked on.
|
||||||
renameStatus[tid] = BarrierStall;
|
renameStatus[tid] = SerializeStall;
|
||||||
|
|
||||||
barrierInst[tid] = inst;
|
serializeInst[tid] = inst;
|
||||||
|
|
||||||
blockThisCycle = true;
|
blockThisCycle = true;
|
||||||
|
|
||||||
|
@ -716,9 +730,9 @@ DefaultRename<Impl>::block(unsigned tid)
|
||||||
wroteToTimeBuffer = true;
|
wroteToTimeBuffer = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rename can not go from BarrierStall to Blocked, otherwise it would
|
// Rename can not go from SerializeStall to Blocked, otherwise it would
|
||||||
// not know to complete the barrier stall.
|
// not know to complete the serialize stall.
|
||||||
if (renameStatus[tid] != BarrierStall) {
|
if (renameStatus[tid] != SerializeStall) {
|
||||||
// Set status to Blocked.
|
// Set status to Blocked.
|
||||||
renameStatus[tid] = Blocked;
|
renameStatus[tid] = Blocked;
|
||||||
return true;
|
return true;
|
||||||
|
@ -735,7 +749,7 @@ DefaultRename<Impl>::unblock(unsigned tid)
|
||||||
DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
|
DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
|
||||||
|
|
||||||
// Rename is done unblocking if the skid buffer is empty.
|
// Rename is done unblocking if the skid buffer is empty.
|
||||||
if (skidBuffer[tid].empty() && renameStatus[tid] != BarrierStall) {
|
if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) {
|
||||||
|
|
||||||
DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
|
DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
|
||||||
|
|
||||||
|
@ -1008,9 +1022,9 @@ DefaultRename<Impl>::checkStall(unsigned tid)
|
||||||
} else if (renameMap[tid]->numFreeEntries() <= 0) {
|
} else if (renameMap[tid]->numFreeEntries() <= 0) {
|
||||||
DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
|
DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
|
||||||
ret_val = true;
|
ret_val = true;
|
||||||
} else if (renameStatus[tid] == BarrierStall &&
|
} else if (renameStatus[tid] == SerializeStall &&
|
||||||
(!emptyROB[tid] || instsInProgress[tid])) {
|
(!emptyROB[tid] || instsInProgress[tid])) {
|
||||||
DPRINTF(Rename,"[tid:%i]: Stall: Barrier stall and ROB is not "
|
DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not "
|
||||||
"empty.\n",
|
"empty.\n",
|
||||||
tid);
|
tid);
|
||||||
ret_val = true;
|
ret_val = true;
|
||||||
|
@ -1064,7 +1078,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
|
||||||
// if so then go to unblocking
|
// if so then go to unblocking
|
||||||
// If status was Squashing
|
// If status was Squashing
|
||||||
// check if squashing is not high. Switch to running this cycle.
|
// check if squashing is not high. Switch to running this cycle.
|
||||||
// If status was barrier stall
|
// If status was serialize stall
|
||||||
// check if ROB is empty and no insts are in flight to the ROB
|
// check if ROB is empty and no insts are in flight to the ROB
|
||||||
|
|
||||||
readFreeEntries(tid);
|
readFreeEntries(tid);
|
||||||
|
@ -1113,12 +1127,12 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (renameStatus[tid] == BarrierStall) {
|
if (renameStatus[tid] == SerializeStall) {
|
||||||
// Stall ends once the ROB is free.
|
// Stall ends once the ROB is free.
|
||||||
DPRINTF(Rename, "[tid:%u]: Done with barrier stall, switching to "
|
DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to "
|
||||||
"unblocking.\n", tid);
|
"unblocking.\n", tid);
|
||||||
|
|
||||||
DynInstPtr barr_inst = barrierInst[tid];
|
DynInstPtr serial_inst = serializeInst[tid];
|
||||||
|
|
||||||
renameStatus[tid] = Unblocking;
|
renameStatus[tid] = Unblocking;
|
||||||
|
|
||||||
|
@ -1126,21 +1140,21 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
|
||||||
|
|
||||||
DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
|
DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
|
||||||
"PC %#x.\n",
|
"PC %#x.\n",
|
||||||
tid, barr_inst->seqNum, barr_inst->readPC());
|
tid, serial_inst->seqNum, serial_inst->readPC());
|
||||||
|
|
||||||
// Put instruction into queue here.
|
// Put instruction into queue here.
|
||||||
barr_inst->clearSerializeBefore();
|
serial_inst->clearSerializeBefore();
|
||||||
|
|
||||||
if (!skidBuffer[tid].empty()) {
|
if (!skidBuffer[tid].empty()) {
|
||||||
skidBuffer[tid].push_front(barr_inst);
|
skidBuffer[tid].push_front(serial_inst);
|
||||||
} else {
|
} else {
|
||||||
insts[tid].push_front(barr_inst);
|
insts[tid].push_front(serial_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
|
DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
|
||||||
" Adding to front of list.", tid);
|
" Adding to front of list.", tid);
|
||||||
|
|
||||||
barrierInst[tid] = NULL;
|
serializeInst[tid] = NULL;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue