o3-delay-slot-bpred: fix decode stage handling of uncdtl. branches.\n decode stage was not setting the predicted PC correctly or passing that information back to fetch correctly
This commit is contained in:
parent
cc9e834e93
commit
5c1742b822
7 changed files with 86 additions and 25 deletions
|
@ -177,3 +177,5 @@ TraceFlag('Quiesce')
|
||||||
|
|
||||||
CompoundFlag('Exec', [ 'ExecEnable', 'ExecTicks', 'ExecOpClass', 'ExecThread',
|
CompoundFlag('Exec', [ 'ExecEnable', 'ExecTicks', 'ExecOpClass', 'ExecThread',
|
||||||
'ExecEffAddr', 'ExecResult', 'ExecSymbol', 'ExecMicro' ])
|
'ExecEffAddr', 'ExecResult', 'ExecSymbol', 'ExecMicro' ])
|
||||||
|
CompoundFlag('ExecNoTicks', [ 'ExecEnable', 'ExecOpClass', 'ExecThread',
|
||||||
|
'ExecEffAddr', 'ExecResult', 'ExecMicro' ])
|
||||||
|
|
|
@ -188,6 +188,10 @@ class BPredUnit
|
||||||
wasCall(0), bpHistory(bp_history)
|
wasCall(0), bpHistory(bp_history)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
|
bool operator==(const PredictorHistory &entry) const {
|
||||||
|
return this->seqNum == entry.seqNum;
|
||||||
|
}
|
||||||
|
|
||||||
/** The sequence number for the predictor history entry. */
|
/** The sequence number for the predictor history entry. */
|
||||||
InstSeqNum seqNum;
|
InstSeqNum seqNum;
|
||||||
|
|
||||||
|
@ -220,6 +224,7 @@ class BPredUnit
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::list<PredictorHistory> History;
|
typedef std::list<PredictorHistory> History;
|
||||||
|
typedef typename History::iterator HistoryIt;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The per-thread predictor history. This is used to update the predictor
|
* The per-thread predictor history. This is used to update the predictor
|
||||||
|
|
|
@ -36,6 +36,8 @@
|
||||||
|
|
||||||
#include "params/DerivO3CPU.hh"
|
#include "params/DerivO3CPU.hh"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
BPredUnit<Impl>::BPredUnit(DerivO3CPUParams *params)
|
BPredUnit<Impl>::BPredUnit(DerivO3CPUParams *params)
|
||||||
: _name(params->name + ".BPredUnit"),
|
: _name(params->name + ".BPredUnit"),
|
||||||
|
@ -173,6 +175,10 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
tid, pred_taken, inst->readPC());
|
tid, pred_taken, inst->readPC());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction history "
|
||||||
|
"for PC %#x\n",
|
||||||
|
tid, inst->seqNum, inst->readPC());
|
||||||
|
|
||||||
PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
|
PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
|
||||||
bp_history, tid);
|
bp_history, tid);
|
||||||
|
|
||||||
|
@ -240,7 +246,8 @@ BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
|
|
||||||
predHist[tid].push_front(predict_record);
|
predHist[tid].push_front(predict_record);
|
||||||
|
|
||||||
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size());
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added."
|
||||||
|
"predHist.size(): %i\n", tid, inst->seqNum, predHist[tid].size());
|
||||||
|
|
||||||
return pred_taken;
|
return pred_taken;
|
||||||
}
|
}
|
||||||
|
@ -249,7 +256,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
|
BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
|
||||||
{
|
{
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until "
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until "
|
||||||
"[sn:%lli].\n", tid, done_sn);
|
"[sn:%lli].\n", tid, done_sn);
|
||||||
|
|
||||||
while (!predHist[tid].empty() &&
|
while (!predHist[tid].empty() &&
|
||||||
|
@ -290,7 +297,12 @@ BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
|
||||||
// This call should delete the bpHistory.
|
// This call should delete the bpHistory.
|
||||||
BPSquash(pred_hist.front().bpHistory);
|
BPSquash(pred_hist.front().bpHistory);
|
||||||
|
|
||||||
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
|
||||||
|
"PC %#x.\n", tid, pred_hist.front().seqNum, pred_hist.front().PC);
|
||||||
|
|
||||||
pred_hist.pop_front();
|
pred_hist.pop_front();
|
||||||
|
|
||||||
|
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -305,6 +317,13 @@ BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
||||||
// Now that we know that a branch was mispredicted, we need to undo
|
// Now that we know that a branch was mispredicted, we need to undo
|
||||||
// all the branches that have been seen up until this branch and
|
// all the branches that have been seen up until this branch and
|
||||||
// fix up everything.
|
// fix up everything.
|
||||||
|
// NOTE: This should be call conceivably in 2 scenarios:
|
||||||
|
// (1) After an branch is executed, it updates its status in the ROB
|
||||||
|
// The commit stage then checks the ROB update and sends a signal to
|
||||||
|
// the fetch stage to squash history after the mispredict
|
||||||
|
// (2) In the decode stage, you can find out early if a unconditional
|
||||||
|
// PC-relative, branch was predicted incorrectly. If so, a signal
|
||||||
|
// to the fetch stage is sent to squash history after the mispredict
|
||||||
|
|
||||||
History &pred_hist = predHist[tid];
|
History &pred_hist = predHist[tid];
|
||||||
|
|
||||||
|
@ -314,22 +333,42 @@ BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
||||||
"setting target to %#x.\n",
|
"setting target to %#x.\n",
|
||||||
tid, squashed_sn, corr_target);
|
tid, squashed_sn, corr_target);
|
||||||
|
|
||||||
|
// Squash All Branches AFTER this mispredicted branch
|
||||||
squash(squashed_sn, tid);
|
squash(squashed_sn, tid);
|
||||||
|
|
||||||
// If there's a squash due to a syscall, there may not be an entry
|
// If there's a squash due to a syscall, there may not be an entry
|
||||||
// corresponding to the squash. In that case, don't bother trying to
|
// corresponding to the squash. In that case, don't bother trying to
|
||||||
// fix up the entry.
|
// fix up the entry.
|
||||||
if (!pred_hist.empty()) {
|
if (!pred_hist.empty()) {
|
||||||
|
|
||||||
|
HistoryIt hist_it = pred_hist.begin();
|
||||||
|
//HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
|
||||||
|
// squashed_sn);
|
||||||
|
|
||||||
|
//assert(hist_it != pred_hist.end());
|
||||||
|
if (pred_hist.front().seqNum != squashed_sn) {
|
||||||
|
DPRINTF(Fetch, "Front sn %i != Squash sn %i\n",
|
||||||
|
pred_hist.front().seqNum, squashed_sn);
|
||||||
|
|
||||||
assert(pred_hist.front().seqNum == squashed_sn);
|
assert(pred_hist.front().seqNum == squashed_sn);
|
||||||
if (pred_hist.front().usedRAS) {
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ((*hist_it).usedRAS) {
|
||||||
++RASIncorrect;
|
++RASIncorrect;
|
||||||
}
|
}
|
||||||
|
|
||||||
BPUpdate(pred_hist.front().PC, actually_taken,
|
BPUpdate((*hist_it).PC, actually_taken,
|
||||||
pred_hist.front().bpHistory);
|
pred_hist.front().bpHistory);
|
||||||
|
|
||||||
BTB.update(pred_hist.front().PC, corr_target, tid);
|
BTB.update((*hist_it).PC, corr_target, tid);
|
||||||
pred_hist.pop_front();
|
|
||||||
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
|
||||||
|
"PC %#x.\n", tid, (*hist_it).seqNum, (*hist_it).PC);
|
||||||
|
|
||||||
|
pred_hist.erase(hist_it);
|
||||||
|
|
||||||
|
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,7 +425,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
BPredUnit<Impl>::dump()
|
BPredUnit<Impl>::dump()
|
||||||
{
|
{
|
||||||
typename History::iterator pred_hist_it;
|
HistoryIt pred_hist_it;
|
||||||
|
|
||||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
if (!predHist[i].empty()) {
|
if (!predHist[i].empty()) {
|
||||||
|
|
|
@ -262,28 +262,30 @@ template<class Impl>
|
||||||
void
|
void
|
||||||
DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
||||||
{
|
{
|
||||||
DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
|
DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch prediction "
|
||||||
"detected at decode.\n", tid);
|
"detected at decode.\n", tid, inst->seqNum);
|
||||||
|
|
||||||
// Send back mispredict information.
|
// Send back mispredict information.
|
||||||
toFetch->decodeInfo[tid].branchMispredict = true;
|
toFetch->decodeInfo[tid].branchMispredict = true;
|
||||||
toFetch->decodeInfo[tid].predIncorrect = true;
|
toFetch->decodeInfo[tid].predIncorrect = true;
|
||||||
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
|
||||||
toFetch->decodeInfo[tid].squash = true;
|
toFetch->decodeInfo[tid].squash = true;
|
||||||
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
|
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
||||||
///FIXME There needs to be a way to set the nextPC and nextNPC
|
|
||||||
///explicitly for ISAs with delay slots.
|
|
||||||
toFetch->decodeInfo[tid].nextNPC =
|
|
||||||
inst->branchTarget() + sizeof(TheISA::MachInst);
|
|
||||||
toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
|
toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
|
||||||
|
|
||||||
#if ISA_HAS_DELAY_SLOT
|
#if ISA_HAS_DELAY_SLOT
|
||||||
|
toFetch->decodeInfo[tid].nextPC = inst->readPC() + sizeof(TheISA::MachInst);
|
||||||
|
toFetch->decodeInfo[tid].nextNPC = inst->branchTarget();
|
||||||
toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
|
toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
|
||||||
(inst->readNextPC() + sizeof(TheISA::MachInst));
|
(inst->readNextPC() + sizeof(TheISA::MachInst));
|
||||||
#else
|
#else
|
||||||
|
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
|
||||||
|
toFetch->decodeInfo[tid].nextNPC =
|
||||||
|
inst->branchTarget() + sizeof(TheISA::MachInst);
|
||||||
toFetch->decodeInfo[tid].branchTaken =
|
toFetch->decodeInfo[tid].branchTaken =
|
||||||
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
|
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
InstSeqNum squash_seq_num = inst->seqNum;
|
InstSeqNum squash_seq_num = inst->seqNum;
|
||||||
|
|
||||||
// Might have to tell fetch to unblock.
|
// Might have to tell fetch to unblock.
|
||||||
|
@ -738,8 +740,19 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
|
||||||
// a check at the end
|
// a check at the end
|
||||||
squash(inst, inst->threadNumber);
|
squash(inst, inst->threadNumber);
|
||||||
Addr target = inst->branchTarget();
|
Addr target = inst->branchTarget();
|
||||||
|
|
||||||
|
#if ISA_HAS_DELAY_SLOT
|
||||||
|
DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x PredNextPC: %#x\n",
|
||||||
|
inst->seqNum, inst->readPC() + sizeof(TheISA::MachInst), target);
|
||||||
|
|
||||||
|
//The micro pc after an instruction level branch should be 0
|
||||||
|
inst->setPredTarg(inst->readPC() + sizeof(TheISA::MachInst), target, 0);
|
||||||
|
#else
|
||||||
|
DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %#x PredNextPC: %#x\n",
|
||||||
|
inst->seqNum, target, target + sizeof(TheISA::MachInst));
|
||||||
//The micro pc after an instruction level branch should be 0
|
//The micro pc after an instruction level branch should be 0
|
||||||
inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
|
inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -524,12 +524,13 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
||||||
Addr pred_PC = next_PC;
|
Addr pred_PC = next_PC;
|
||||||
predict_taken = branchPred.predict(inst, pred_PC, tid);
|
predict_taken = branchPred.predict(inst, pred_PC, tid);
|
||||||
|
|
||||||
/* if (predict_taken) {
|
if (predict_taken) {
|
||||||
DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
|
DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %#x.\n",
|
||||||
tid, pred_PC);
|
tid, inst->seqNum, pred_PC);
|
||||||
} else {
|
} else {
|
||||||
DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
|
DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
|
||||||
}*/
|
tid, inst->seqNum);
|
||||||
|
}
|
||||||
|
|
||||||
#if ISA_HAS_DELAY_SLOT
|
#if ISA_HAS_DELAY_SLOT
|
||||||
next_PC = next_NPC;
|
next_PC = next_NPC;
|
||||||
|
@ -544,8 +545,9 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
||||||
next_PC += instSize;
|
next_PC += instSize;
|
||||||
next_NPC = next_PC + instSize;
|
next_NPC = next_PC + instSize;
|
||||||
#endif
|
#endif
|
||||||
/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
|
|
||||||
tid, next_PC, next_NPC);*/
|
DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %#x and then %#x.\n",
|
||||||
|
tid, inst->seqNum, next_PC, next_NPC);
|
||||||
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
|
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
|
||||||
inst->setPredTaken(predict_taken);
|
inst->setPredTaken(predict_taken);
|
||||||
|
|
||||||
|
|
|
@ -1282,7 +1282,7 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
fetchRedirect[tid] = true;
|
fetchRedirect[tid] = true;
|
||||||
|
|
||||||
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
|
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
|
||||||
DPRINTF(IEW, "Predicted target was %#x, %#x.\n",
|
DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
|
||||||
inst->readPredPC(), inst->readPredNPC());
|
inst->readPredPC(), inst->readPredNPC());
|
||||||
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x,"
|
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x,"
|
||||||
" NPC: %#x.\n", inst->readNextPC(),
|
" NPC: %#x.\n", inst->readNextPC(),
|
||||||
|
|
Loading…
Reference in a new issue