cpu: Change writeback modeling for outstanding instructions
As highlighed on the mailing list gem5's writeback modeling can impact performance. This patch removes the limitation on maximum outstanding issued instructions, however the number that can writeback in a single cycle is still respected in instToCommit().
This commit is contained in:
parent
fd722946dd
commit
976f27487b
7 changed files with 1 additions and 78 deletions
|
@ -126,7 +126,6 @@ class O3_ARM_v7a_3(DerivO3CPU):
|
||||||
dispatchWidth = 6
|
dispatchWidth = 6
|
||||||
issueWidth = 8
|
issueWidth = 8
|
||||||
wbWidth = 8
|
wbWidth = 8
|
||||||
wbDepth = 1
|
|
||||||
fuPool = O3_ARM_v7a_FUP()
|
fuPool = O3_ARM_v7a_FUP()
|
||||||
iewToCommitDelay = 1
|
iewToCommitDelay = 1
|
||||||
renameToROBDelay = 1
|
renameToROBDelay = 1
|
||||||
|
|
|
@ -84,7 +84,6 @@ class DerivO3CPU(BaseCPU):
|
||||||
dispatchWidth = Param.Unsigned(8, "Dispatch width")
|
dispatchWidth = Param.Unsigned(8, "Dispatch width")
|
||||||
issueWidth = Param.Unsigned(8, "Issue width")
|
issueWidth = Param.Unsigned(8, "Issue width")
|
||||||
wbWidth = Param.Unsigned(8, "Writeback width")
|
wbWidth = Param.Unsigned(8, "Writeback width")
|
||||||
wbDepth = Param.Unsigned(1, "Writeback depth")
|
|
||||||
fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool")
|
fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool")
|
||||||
|
|
||||||
iewToCommitDelay = Param.Cycles(1, "Issue/Execute/Writeback to commit "
|
iewToCommitDelay = Param.Cycles(1, "Issue/Execute/Writeback to commit "
|
||||||
|
|
|
@ -219,49 +219,6 @@ class DefaultIEW
|
||||||
/** Returns if the LSQ has any stores to writeback. */
|
/** Returns if the LSQ has any stores to writeback. */
|
||||||
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
|
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
|
||||||
|
|
||||||
void incrWb(InstSeqNum &sn)
|
|
||||||
{
|
|
||||||
++wbOutstanding;
|
|
||||||
if (wbOutstanding == wbMax)
|
|
||||||
ableToIssue = false;
|
|
||||||
DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
|
|
||||||
assert(wbOutstanding <= wbMax);
|
|
||||||
#ifdef DEBUG
|
|
||||||
wbList.insert(sn);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void decrWb(InstSeqNum &sn)
|
|
||||||
{
|
|
||||||
if (wbOutstanding == wbMax)
|
|
||||||
ableToIssue = true;
|
|
||||||
wbOutstanding--;
|
|
||||||
DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
|
|
||||||
assert(wbOutstanding >= 0);
|
|
||||||
#ifdef DEBUG
|
|
||||||
assert(wbList.find(sn) != wbList.end());
|
|
||||||
wbList.erase(sn);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::set<InstSeqNum> wbList;
|
|
||||||
|
|
||||||
void dumpWb()
|
|
||||||
{
|
|
||||||
std::set<InstSeqNum>::iterator wb_it = wbList.begin();
|
|
||||||
while (wb_it != wbList.end()) {
|
|
||||||
cprintf("[sn:%lli]\n",
|
|
||||||
(*wb_it));
|
|
||||||
wb_it++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool canIssue() { return ableToIssue; }
|
|
||||||
|
|
||||||
bool ableToIssue;
|
|
||||||
|
|
||||||
/** Check misprediction */
|
/** Check misprediction */
|
||||||
void checkMisprediction(DynInstPtr &inst);
|
void checkMisprediction(DynInstPtr &inst);
|
||||||
|
|
||||||
|
@ -452,19 +409,9 @@ class DefaultIEW
|
||||||
*/
|
*/
|
||||||
unsigned wbCycle;
|
unsigned wbCycle;
|
||||||
|
|
||||||
/** Number of instructions in flight that will writeback. */
|
|
||||||
|
|
||||||
/** Number of instructions in flight that will writeback. */
|
|
||||||
int wbOutstanding;
|
|
||||||
|
|
||||||
/** Writeback width. */
|
/** Writeback width. */
|
||||||
unsigned wbWidth;
|
unsigned wbWidth;
|
||||||
|
|
||||||
/** Writeback width * writeback depth, where writeback depth is
|
|
||||||
* the number of cycles of writing back instructions that can be
|
|
||||||
* buffered. */
|
|
||||||
unsigned wbMax;
|
|
||||||
|
|
||||||
/** Number of active threads. */
|
/** Number of active threads. */
|
||||||
ThreadID numThreads;
|
ThreadID numThreads;
|
||||||
|
|
||||||
|
|
|
@ -76,7 +76,6 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params)
|
||||||
issueToExecuteDelay(params->issueToExecuteDelay),
|
issueToExecuteDelay(params->issueToExecuteDelay),
|
||||||
dispatchWidth(params->dispatchWidth),
|
dispatchWidth(params->dispatchWidth),
|
||||||
issueWidth(params->issueWidth),
|
issueWidth(params->issueWidth),
|
||||||
wbOutstanding(0),
|
|
||||||
wbWidth(params->wbWidth),
|
wbWidth(params->wbWidth),
|
||||||
numThreads(params->numThreads)
|
numThreads(params->numThreads)
|
||||||
{
|
{
|
||||||
|
@ -109,12 +108,8 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params)
|
||||||
fetchRedirect[tid] = false;
|
fetchRedirect[tid] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
wbMax = wbWidth * params->wbDepth;
|
|
||||||
|
|
||||||
updateLSQNextCycle = false;
|
updateLSQNextCycle = false;
|
||||||
|
|
||||||
ableToIssue = true;
|
|
||||||
|
|
||||||
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
|
skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,8 +630,6 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
|
||||||
++wbCycle;
|
++wbCycle;
|
||||||
wbNumInst = 0;
|
wbNumInst = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
|
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
|
||||||
|
@ -1263,7 +1256,6 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
|
|
||||||
++iewExecSquashedInsts;
|
++iewExecSquashedInsts;
|
||||||
|
|
||||||
decrWb(inst->seqNum);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1502,8 +1494,6 @@ DefaultIEW<Impl>::writebackInsts()
|
||||||
}
|
}
|
||||||
writebackCount[tid]++;
|
writebackCount[tid]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
decrWb(inst->seqNum);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -756,7 +756,6 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
int total_issued = 0;
|
int total_issued = 0;
|
||||||
|
|
||||||
while (total_issued < (totalWidth - total_deferred_mem_issued) &&
|
while (total_issued < (totalWidth - total_deferred_mem_issued) &&
|
||||||
iewStage->canIssue() &&
|
|
||||||
order_it != order_end_it) {
|
order_it != order_end_it) {
|
||||||
OpClass op_class = (*order_it).queueType;
|
OpClass op_class = (*order_it).queueType;
|
||||||
|
|
||||||
|
@ -861,7 +860,6 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
|
|
||||||
listOrder.erase(order_it++);
|
listOrder.erase(order_it++);
|
||||||
statIssuedInstType[tid][op_class]++;
|
statIssuedInstType[tid][op_class]++;
|
||||||
iewStage->incrWb(issuing_inst->seqNum);
|
|
||||||
} else {
|
} else {
|
||||||
statFuBusy[op_class]++;
|
statFuBusy[op_class]++;
|
||||||
fuBusy[tid]++;
|
fuBusy[tid]++;
|
||||||
|
|
|
@ -762,7 +762,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
// Tell IQ/mem dep unit that this instruction will need to be
|
// Tell IQ/mem dep unit that this instruction will need to be
|
||||||
// rescheduled eventually
|
// rescheduled eventually
|
||||||
iewStage->rescheduleMemInst(load_inst);
|
iewStage->rescheduleMemInst(load_inst);
|
||||||
iewStage->decrWb(load_inst->seqNum);
|
|
||||||
load_inst->clearIssued();
|
load_inst->clearIssued();
|
||||||
++lsqRescheduledLoads;
|
++lsqRescheduledLoads;
|
||||||
|
|
||||||
|
@ -889,12 +888,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||||
|
|
||||||
++lsqCacheBlocked;
|
++lsqCacheBlocked;
|
||||||
|
|
||||||
// If the first part of a split access succeeds, then let the LSQ
|
|
||||||
// handle the decrWb when completeDataAccess is called upon return
|
|
||||||
// of the requested first part of data
|
|
||||||
if (!completedFirst)
|
|
||||||
iewStage->decrWb(load_inst->seqNum);
|
|
||||||
|
|
||||||
// There's an older load that's already going to squash.
|
// There's an older load that's already going to squash.
|
||||||
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
|
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
|
||||||
return NoFault;
|
return NoFault;
|
||||||
|
|
|
@ -109,9 +109,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!cpu->switchedOut());
|
assert(!cpu->switchedOut());
|
||||||
if (inst->isSquashed()) {
|
if (!inst->isSquashed()) {
|
||||||
iewStage->decrWb(inst->seqNum);
|
|
||||||
} else {
|
|
||||||
if (!state->noWB) {
|
if (!state->noWB) {
|
||||||
if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
|
if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
|
||||||
!state->isLoad) {
|
!state->isLoad) {
|
||||||
|
@ -1130,7 +1128,6 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
|
||||||
|
|
||||||
// Squashed instructions do not need to complete their access.
|
// Squashed instructions do not need to complete their access.
|
||||||
if (inst->isSquashed()) {
|
if (inst->isSquashed()) {
|
||||||
iewStage->decrWb(inst->seqNum);
|
|
||||||
assert(!inst->isStore());
|
assert(!inst->isStore());
|
||||||
++lsqIgnoredResponses;
|
++lsqIgnoredResponses;
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in a new issue