Add support for microcode and pull out the special branch delay slot handling. Branch delay slots need to be squash on a mispredict as well because the nnpc they saw was incorrect.
--HG-- extra : convert_revision : 8b9c603616bcad254417a7a3fa3edfb4c8728719
This commit is contained in:
parent
5a3dcc172a
commit
c3081d9c1c
|
@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** PC of this instruction. */
|
||||
Addr PC;
|
||||
|
||||
/** Micro PC of this instruction. */
|
||||
Addr microPC;
|
||||
|
||||
protected:
|
||||
/** Next non-speculative PC. It is not filled in at fetch, but rather
|
||||
* once the target of the branch is truly known (either decode or
|
||||
|
@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** Next non-speculative NPC. Target PC for Mips or Sparc. */
|
||||
Addr nextNPC;
|
||||
|
||||
/** Next non-speculative micro PC. */
|
||||
Addr nextMicroPC;
|
||||
|
||||
/** Predicted next PC. */
|
||||
Addr predPC;
|
||||
|
||||
/** Predicted next NPC. */
|
||||
Addr predNPC;
|
||||
|
||||
/** Predicted next microPC */
|
||||
Addr predMicroPC;
|
||||
|
||||
/** If this is a branch that was predicted taken */
|
||||
bool predTaken;
|
||||
|
||||
|
@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
{
|
||||
_flatDestRegIdx[idx] = flattened_dest;
|
||||
}
|
||||
/** BaseDynInst constructor given a binary instruction.
|
||||
* @param staticInst A StaticInstPtr to the underlying instruction.
|
||||
* @param PC The PC of the instruction.
|
||||
* @param pred_PC The predicted next PC.
|
||||
* @param pred_NPC The predicted next NPC.
|
||||
* @param seq_num The sequence number of the instruction.
|
||||
* @param cpu Pointer to the instruction's CPU.
|
||||
*/
|
||||
BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction.
|
||||
* @param inst The binary instruction.
|
||||
|
@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
* @param seq_num The sequence number of the instruction.
|
||||
* @param cpu Pointer to the instruction's CPU.
|
||||
*/
|
||||
BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
|
||||
Addr pred_PC, Addr pred_NPC,
|
||||
BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a StaticInst pointer.
|
||||
|
@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
#endif
|
||||
}
|
||||
|
||||
Addr readNextMicroPC()
|
||||
{
|
||||
return nextMicroPC;
|
||||
}
|
||||
|
||||
/** Set the predicted target of this current instruction. */
|
||||
void setPredTarg(Addr predicted_PC, Addr predicted_NPC)
|
||||
void setPredTarg(Addr predicted_PC, Addr predicted_NPC,
|
||||
Addr predicted_MicroPC)
|
||||
{
|
||||
predPC = predicted_PC;
|
||||
predNPC = predicted_NPC;
|
||||
predMicroPC = predicted_MicroPC;
|
||||
}
|
||||
|
||||
/** Returns the predicted PC immediately after the branch. */
|
||||
|
@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** Returns the predicted PC two instructions after the branch */
|
||||
Addr readPredNPC() { return predNPC; }
|
||||
|
||||
/** Returns the predicted micro PC after the branch */
|
||||
Addr readPredMicroPC() { return predMicroPC; }
|
||||
|
||||
/** Returns whether the instruction was predicted taken or not. */
|
||||
bool readPredTaken()
|
||||
{
|
||||
|
@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
bool mispredicted()
|
||||
{
|
||||
return readPredPC() != readNextPC() ||
|
||||
readPredNPC() != readNextNPC();
|
||||
readPredNPC() != readNextNPC() ||
|
||||
readPredMicroPC() != readNextMicroPC();
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
bool isQuiesce() const { return staticInst->isQuiesce(); }
|
||||
bool isIprAccess() const { return staticInst->isIprAccess(); }
|
||||
bool isUnverifiable() const { return staticInst->isUnverifiable(); }
|
||||
bool isMacroOp() const { return staticInst->isMacroOp(); }
|
||||
bool isMicroOp() const { return staticInst->isMicroOp(); }
|
||||
bool isDelayedCommit() const { return staticInst->isDelayedCommit(); }
|
||||
bool isLastMicroOp() const { return staticInst->isLastMicroOp(); }
|
||||
bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); }
|
||||
bool isMicroBranch() const { return staticInst->isMicroBranch(); }
|
||||
|
||||
/** Temporarily sets this instruction as a serialize before instruction. */
|
||||
void setSerializeBefore() { status.set(SerializeBefore); }
|
||||
|
@ -700,20 +737,28 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** Read the PC of this instruction. */
|
||||
const Addr readPC() const { return PC; }
|
||||
|
||||
/**Read the micro PC of this instruction. */
|
||||
const Addr readMicroPC() const { return microPC; }
|
||||
|
||||
/** Set the next PC of this instruction (its actual target). */
|
||||
void setNextPC(uint64_t val)
|
||||
void setNextPC(Addr val)
|
||||
{
|
||||
nextPC = val;
|
||||
}
|
||||
|
||||
/** Set the next NPC of this instruction (the target in Mips or Sparc).*/
|
||||
void setNextNPC(uint64_t val)
|
||||
void setNextNPC(Addr val)
|
||||
{
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
nextNPC = val;
|
||||
#endif
|
||||
}
|
||||
|
||||
void setNextMicroPC(Addr val)
|
||||
{
|
||||
nextMicroPC = val;
|
||||
}
|
||||
|
||||
/** Sets the ASID. */
|
||||
void setASID(short addr_space_id) { asid = addr_space_id; }
|
||||
|
||||
|
|
|
@ -62,19 +62,66 @@ my_hash_t thishash;
|
|||
#endif
|
||||
|
||||
template <class Impl>
|
||||
BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst,
|
||||
BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
|
||||
Addr inst_PC, Addr inst_NPC,
|
||||
Addr inst_MicroPC,
|
||||
Addr pred_PC, Addr pred_NPC,
|
||||
Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu)
|
||||
: staticInst(machInst), traceData(NULL), cpu(cpu)
|
||||
: staticInst(_staticInst), traceData(NULL), cpu(cpu)
|
||||
{
|
||||
seqNum = seq_num;
|
||||
|
||||
bool nextIsMicro =
|
||||
staticInst->isMicroOp() && !staticInst->isLastMicroOp();
|
||||
|
||||
PC = inst_PC;
|
||||
nextPC = inst_NPC;
|
||||
nextNPC = nextPC + sizeof(TheISA::MachInst);
|
||||
microPC = inst_MicroPC;
|
||||
if (nextIsMicro) {
|
||||
nextPC = inst_PC;
|
||||
nextNPC = inst_NPC;
|
||||
nextMicroPC = microPC + 1;
|
||||
} else {
|
||||
nextPC = inst_NPC;
|
||||
nextNPC = nextPC + sizeof(TheISA::MachInst);
|
||||
nextMicroPC = 0;
|
||||
}
|
||||
predPC = pred_PC;
|
||||
predNPC = pred_NPC;
|
||||
predMicroPC = pred_MicroPC;
|
||||
predTaken = false;
|
||||
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst inst,
|
||||
Addr inst_PC, Addr inst_NPC,
|
||||
Addr inst_MicroPC,
|
||||
Addr pred_PC, Addr pred_NPC,
|
||||
Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu)
|
||||
: staticInst(inst), traceData(NULL), cpu(cpu)
|
||||
{
|
||||
seqNum = seq_num;
|
||||
|
||||
bool nextIsMicro =
|
||||
staticInst->isMicroOp() && !staticInst->isLastMicroOp();
|
||||
|
||||
PC = inst_PC;
|
||||
microPC = inst_MicroPC;
|
||||
if (nextIsMicro) {
|
||||
nextPC = inst_PC;
|
||||
nextNPC = inst_NPC;
|
||||
nextMicroPC = microPC + 1;
|
||||
} else {
|
||||
nextPC = inst_NPC;
|
||||
nextNPC = nextPC + sizeof(TheISA::MachInst);
|
||||
nextMicroPC = 0;
|
||||
}
|
||||
predPC = pred_PC;
|
||||
predNPC = pred_NPC;
|
||||
predMicroPC = pred_MicroPC;
|
||||
predTaken = false;
|
||||
|
||||
initVars();
|
||||
|
|
|
@ -87,9 +87,10 @@ struct DefaultIEWDefaultCommit {
|
|||
bool squash[Impl::MaxThreads];
|
||||
bool branchMispredict[Impl::MaxThreads];
|
||||
bool branchTaken[Impl::MaxThreads];
|
||||
uint64_t mispredPC[Impl::MaxThreads];
|
||||
uint64_t nextPC[Impl::MaxThreads];
|
||||
uint64_t nextNPC[Impl::MaxThreads];
|
||||
Addr mispredPC[Impl::MaxThreads];
|
||||
Addr nextPC[Impl::MaxThreads];
|
||||
Addr nextNPC[Impl::MaxThreads];
|
||||
Addr nextMicroPC[Impl::MaxThreads];
|
||||
InstSeqNum squashedSeqNum[Impl::MaxThreads];
|
||||
|
||||
bool includeSquashInst[Impl::MaxThreads];
|
||||
|
@ -118,9 +119,10 @@ struct TimeBufStruct {
|
|||
// struct as it is used pretty frequently.
|
||||
bool branchMispredict;
|
||||
bool branchTaken;
|
||||
uint64_t mispredPC;
|
||||
uint64_t nextPC;
|
||||
uint64_t nextNPC;
|
||||
Addr mispredPC;
|
||||
Addr nextPC;
|
||||
Addr nextNPC;
|
||||
Addr nextMicroPC;
|
||||
|
||||
unsigned branchCount;
|
||||
};
|
||||
|
@ -158,9 +160,10 @@ struct TimeBufStruct {
|
|||
|
||||
bool branchMispredict;
|
||||
bool branchTaken;
|
||||
uint64_t mispredPC;
|
||||
uint64_t nextPC;
|
||||
uint64_t nextNPC;
|
||||
Addr mispredPC;
|
||||
Addr nextPC;
|
||||
Addr nextNPC;
|
||||
Addr nextMicroPC;
|
||||
|
||||
// Represents the instruction that has either been retired or
|
||||
// squashed. Similar to having a single bus that broadcasts the
|
||||
|
|
|
@ -279,25 +279,37 @@ class DefaultCommit
|
|||
/** Returns the PC of the head instruction of the ROB.
|
||||
* @todo: Probably remove this function as it returns only thread 0.
|
||||
*/
|
||||
uint64_t readPC() { return PC[0]; }
|
||||
Addr readPC() { return PC[0]; }
|
||||
|
||||
/** Returns the PC of a specific thread. */
|
||||
uint64_t readPC(unsigned tid) { return PC[tid]; }
|
||||
Addr readPC(unsigned tid) { return PC[tid]; }
|
||||
|
||||
/** Sets the PC of a specific thread. */
|
||||
void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
|
||||
void setPC(Addr val, unsigned tid) { PC[tid] = val; }
|
||||
|
||||
/** Reads the micro PC of a specific thread. */
|
||||
Addr readMicroPC(unsigned tid) { return microPC[tid]; }
|
||||
|
||||
/** Sets the micro PC of a specific thread */
|
||||
void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; }
|
||||
|
||||
/** Reads the next PC of a specific thread. */
|
||||
uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
|
||||
Addr readNextPC(unsigned tid) { return nextPC[tid]; }
|
||||
|
||||
/** Sets the next PC of a specific thread. */
|
||||
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
|
||||
void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; }
|
||||
|
||||
/** Reads the next NPC of a specific thread. */
|
||||
uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; }
|
||||
Addr readNextNPC(unsigned tid) { return nextNPC[tid]; }
|
||||
|
||||
/** Sets the next NPC of a specific thread. */
|
||||
void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
|
||||
void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; }
|
||||
|
||||
/** Reads the micro PC of a specific thread. */
|
||||
Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; }
|
||||
|
||||
/** Sets the micro PC of a specific thread */
|
||||
void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; }
|
||||
|
||||
private:
|
||||
/** Time buffer interface. */
|
||||
|
@ -402,12 +414,20 @@ class DefaultCommit
|
|||
*/
|
||||
Addr PC[Impl::MaxThreads];
|
||||
|
||||
/** The commit micro PC of each thread. Refers to the instruction that
|
||||
* is currently being processed/committed.
|
||||
*/
|
||||
Addr microPC[Impl::MaxThreads];
|
||||
|
||||
/** The next PC of each thread. */
|
||||
Addr nextPC[Impl::MaxThreads];
|
||||
|
||||
/** The next NPC of each thread. */
|
||||
Addr nextNPC[Impl::MaxThreads];
|
||||
|
||||
/** The next micro PC of each thread. */
|
||||
Addr nextMicroPC[Impl::MaxThreads];
|
||||
|
||||
/** The sequence number of the youngest valid instruction in the ROB. */
|
||||
InstSeqNum youngestSeqNum[Impl::MaxThreads];
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, Params *params)
|
|||
committedStores[i] = false;
|
||||
trapSquash[i] = false;
|
||||
tcSquash[i] = false;
|
||||
PC[i] = nextPC[i] = nextNPC[i] = 0;
|
||||
microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0;
|
||||
}
|
||||
#if FULL_SYSTEM
|
||||
interrupt = NoFault;
|
||||
|
@ -508,6 +508,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
|
|||
|
||||
toIEW->commitInfo[tid].nextPC = PC[tid];
|
||||
toIEW->commitInfo[tid].nextNPC = nextPC[tid];
|
||||
toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid];
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -768,6 +769,7 @@ DefaultCommit<Impl>::commit()
|
|||
|
||||
toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
|
||||
toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
|
||||
toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid];
|
||||
|
||||
toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
|
||||
|
||||
|
@ -877,6 +879,7 @@ DefaultCommit<Impl>::commitInsts()
|
|||
PC[tid] = head_inst->readPC();
|
||||
nextPC[tid] = head_inst->readNextPC();
|
||||
nextNPC[tid] = head_inst->readNextNPC();
|
||||
nextMicroPC[tid] = head_inst->readNextMicroPC();
|
||||
|
||||
// Increment the total number of non-speculative instructions
|
||||
// executed.
|
||||
|
@ -905,12 +908,10 @@ DefaultCommit<Impl>::commitInsts()
|
|||
}
|
||||
|
||||
PC[tid] = nextPC[tid];
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
nextPC[tid] = nextNPC[tid];
|
||||
nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
|
||||
#else
|
||||
nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
|
||||
#endif
|
||||
microPC[tid] = nextMicroPC[tid];
|
||||
nextMicroPC[tid] = microPC[tid] + 1;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
int count = 0;
|
||||
|
|
|
@ -696,7 +696,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
|
|||
|
||||
// Squash Throughout Pipeline
|
||||
InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
|
||||
fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, tid);
|
||||
fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid);
|
||||
decode.squash(tid);
|
||||
rename.squash(squash_seq_num, tid);
|
||||
iew.squash(tid);
|
||||
|
@ -1150,6 +1150,20 @@ FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
|
|||
commit.setPC(new_PC, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readMicroPC(unsigned tid)
|
||||
{
|
||||
return commit.readMicroPC(tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::setMicroPC(Addr new_PC,unsigned tid)
|
||||
{
|
||||
commit.setMicroPC(new_PC, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readNextPC(unsigned tid)
|
||||
|
@ -1178,6 +1192,20 @@ FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
|
|||
commit.setNextNPC(val, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readNextMicroPC(unsigned tid)
|
||||
{
|
||||
return commit.readNextMicroPC(tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::setNextMicroPC(Addr new_PC,unsigned tid)
|
||||
{
|
||||
commit.setNextMicroPC(new_PC, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename FullO3CPU<Impl>::ListIt
|
||||
FullO3CPU<Impl>::addInst(DynInstPtr &inst)
|
||||
|
|
|
@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU
|
|||
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
|
||||
|
||||
/** Reads the commit PC of a specific thread. */
|
||||
uint64_t readPC(unsigned tid);
|
||||
Addr readPC(unsigned tid);
|
||||
|
||||
/** Sets the commit PC of a specific thread. */
|
||||
void setPC(Addr new_PC, unsigned tid);
|
||||
|
||||
/** Reads the commit micro PC of a specific thread. */
|
||||
Addr readMicroPC(unsigned tid);
|
||||
|
||||
/** Sets the commmit micro PC of a specific thread. */
|
||||
void setMicroPC(Addr new_microPC, unsigned tid);
|
||||
|
||||
/** Reads the next PC of a specific thread. */
|
||||
uint64_t readNextPC(unsigned tid);
|
||||
Addr readNextPC(unsigned tid);
|
||||
|
||||
/** Sets the next PC of a specific thread. */
|
||||
void setNextPC(uint64_t val, unsigned tid);
|
||||
void setNextPC(Addr val, unsigned tid);
|
||||
|
||||
/** Reads the next NPC of a specific thread. */
|
||||
uint64_t readNextNPC(unsigned tid);
|
||||
Addr readNextNPC(unsigned tid);
|
||||
|
||||
/** Sets the next NPC of a specific thread. */
|
||||
void setNextNPC(uint64_t val, unsigned tid);
|
||||
void setNextNPC(Addr val, unsigned tid);
|
||||
|
||||
/** Reads the commit next micro PC of a specific thread. */
|
||||
Addr readNextMicroPC(unsigned tid);
|
||||
|
||||
/** Sets the commit next micro PC of a specific thread. */
|
||||
void setNextMicroPC(Addr val, unsigned tid);
|
||||
|
||||
/** Function to add instruction onto the head of the list of the
|
||||
* instructions. Used when new instructions are fetched.
|
||||
|
|
|
@ -273,6 +273,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
|||
///explicitly for ISAs with delay slots.
|
||||
toFetch->decodeInfo[tid].nextNPC =
|
||||
inst->branchTarget() + sizeof(TheISA::MachInst);
|
||||
toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
|
||||
(inst->readNextPC() + sizeof(TheISA::MachInst));
|
||||
|
@ -735,7 +736,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
|
|||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
Addr target = inst->branchTarget();
|
||||
inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
|
||||
//The micro pc after an instruction level branch should be 0
|
||||
inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -227,7 +227,7 @@ class DefaultFetch
|
|||
* @param next_NPC Used for ISAs which use delay slots.
|
||||
* @return Whether or not a branch was predicted as taken.
|
||||
*/
|
||||
bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC);
|
||||
bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC);
|
||||
|
||||
/**
|
||||
* Fetches the cache line that contains fetch_PC. Returns any
|
||||
|
@ -242,12 +242,14 @@ class DefaultFetch
|
|||
bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
|
||||
|
||||
/** Squashes a specific thread and resets the PC. */
|
||||
inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
|
||||
inline void doSquash(const Addr &new_PC, const Addr &new_NPC,
|
||||
const Addr &new_MicroPC, unsigned tid);
|
||||
|
||||
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
||||
* remove any instructions between fetch and decode that should be sqaushed.
|
||||
*/
|
||||
void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid);
|
||||
|
||||
/** Checks if a thread is stalled. */
|
||||
|
@ -263,6 +265,7 @@ class DefaultFetch
|
|||
* squash should be the commit stage.
|
||||
*/
|
||||
void squash(const Addr &new_PC, const Addr &new_NPC,
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid);
|
||||
|
||||
/** Ticks the fetch stage, processing all inputs signals and fetching
|
||||
|
@ -346,16 +349,12 @@ class DefaultFetch
|
|||
/** Per-thread fetch PC. */
|
||||
Addr PC[Impl::MaxThreads];
|
||||
|
||||
/** Per-thread fetch micro PC. */
|
||||
Addr microPC[Impl::MaxThreads];
|
||||
|
||||
/** Per-thread next PC. */
|
||||
Addr nextPC[Impl::MaxThreads];
|
||||
|
||||
/** Per-thread next Next PC.
|
||||
* This is not a real register but is used for
|
||||
* architectures that use a branch-delay slot.
|
||||
* (such as MIPS or Sparc)
|
||||
*/
|
||||
Addr nextNPC[Impl::MaxThreads];
|
||||
|
||||
/** Memory request used to access cache. */
|
||||
RequestPtr memReq[Impl::MaxThreads];
|
||||
|
||||
|
|
|
@ -312,7 +312,7 @@ DefaultFetch<Impl>::initStage()
|
|||
for (int tid = 0; tid < numThreads; tid++) {
|
||||
PC[tid] = cpu->readPC(tid);
|
||||
nextPC[tid] = cpu->readNextPC(tid);
|
||||
nextNPC[tid] = cpu->readNextNPC(tid);
|
||||
microPC[tid] = cpu->readMicroPC(tid);
|
||||
}
|
||||
|
||||
for (int tid=0; tid < numThreads; tid++) {
|
||||
|
@ -439,11 +439,7 @@ DefaultFetch<Impl>::takeOverFrom()
|
|||
stalls[i].commit = 0;
|
||||
PC[i] = cpu->readPC(i);
|
||||
nextPC[i] = cpu->readNextPC(i);
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
nextNPC[i] = cpu->readNextNPC(i);
|
||||
#else
|
||||
nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
|
||||
#endif
|
||||
microPC[i] = cpu->readMicroPC(i);
|
||||
fetchStatus[i] = Running;
|
||||
}
|
||||
numInst = 0;
|
||||
|
@ -493,7 +489,7 @@ DefaultFetch<Impl>::switchToInactive()
|
|||
template <class Impl>
|
||||
bool
|
||||
DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
||||
Addr &next_NPC)
|
||||
Addr &next_NPC, Addr &next_MicroPC)
|
||||
{
|
||||
// Do branch prediction check here.
|
||||
// A bit of a misnomer...next_PC is actually the current PC until
|
||||
|
@ -501,13 +497,22 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
|||
bool predict_taken;
|
||||
|
||||
if (!inst->isControl()) {
|
||||
next_PC = next_NPC;
|
||||
next_NPC = next_NPC + instSize;
|
||||
inst->setPredTarg(next_PC, next_NPC);
|
||||
if (inst->isMicroOp() && !inst->isLastMicroOp()) {
|
||||
next_MicroPC++;
|
||||
} else {
|
||||
next_PC = next_NPC;
|
||||
next_NPC = next_NPC + instSize;
|
||||
next_MicroPC = 0;
|
||||
}
|
||||
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
|
||||
inst->setPredTaken(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
//Assume for now that all control flow is to a different macroop which
|
||||
//would reset the micro pc to 0.
|
||||
next_MicroPC = 0;
|
||||
|
||||
int tid = inst->threadNumber;
|
||||
Addr pred_PC = next_PC;
|
||||
predict_taken = branchPred.predict(inst, pred_PC, tid);
|
||||
|
@ -534,7 +539,7 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
|||
#endif
|
||||
/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
|
||||
tid, next_PC, next_NPC);*/
|
||||
inst->setPredTarg(next_PC, next_NPC);
|
||||
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
|
||||
inst->setPredTaken(predict_taken);
|
||||
|
||||
++fetchedBranches;
|
||||
|
@ -658,14 +663,14 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
|||
template <class Impl>
|
||||
inline void
|
||||
DefaultFetch<Impl>::doSquash(const Addr &new_PC,
|
||||
const Addr &new_NPC, unsigned tid)
|
||||
const Addr &new_NPC, const Addr &new_microPC, unsigned tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
|
||||
tid, new_PC, new_NPC);
|
||||
|
||||
PC[tid] = new_PC;
|
||||
nextPC[tid] = new_NPC;
|
||||
nextNPC[tid] = new_NPC + instSize;
|
||||
microPC[tid] = new_microPC;
|
||||
|
||||
// Clear the icache miss if it's outstanding.
|
||||
if (fetchStatus[tid] == IcacheWaitResponse) {
|
||||
|
@ -693,12 +698,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
|
|||
template<class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
|
||||
const InstSeqNum &seq_num,
|
||||
unsigned tid)
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
|
||||
|
||||
doSquash(new_PC, new_NPC, tid);
|
||||
doSquash(new_PC, new_NPC, new_MicroPC, tid);
|
||||
|
||||
// Tell the CPU to remove any instructions that are in flight between
|
||||
// fetch and decode.
|
||||
|
@ -774,11 +779,12 @@ DefaultFetch<Impl>::updateFetchStatus()
|
|||
template <class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
|
||||
|
||||
doSquash(new_PC, new_NPC, tid);
|
||||
doSquash(new_PC, new_NPC, new_MicroPC, tid);
|
||||
|
||||
// Tell the CPU to remove any instructions that are not in the ROB.
|
||||
cpu->removeInstsNotInROB(tid);
|
||||
|
@ -893,6 +899,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
|
|||
// In any case, squash.
|
||||
squash(fromCommit->commitInfo[tid].nextPC,
|
||||
fromCommit->commitInfo[tid].nextNPC,
|
||||
fromCommit->commitInfo[tid].nextMicroPC,
|
||||
fromCommit->commitInfo[tid].doneSeqNum,
|
||||
tid);
|
||||
|
||||
|
@ -948,6 +955,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
|
|||
// Squash unless we're already squashing
|
||||
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
|
||||
fromDecode->decodeInfo[tid].nextNPC,
|
||||
fromDecode->decodeInfo[tid].nextMicroPC,
|
||||
fromDecode->decodeInfo[tid].doneSeqNum,
|
||||
tid);
|
||||
|
||||
|
@ -1002,9 +1010,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
|
||||
|
||||
// The current PC.
|
||||
Addr &fetch_PC = PC[tid];
|
||||
|
||||
Addr &fetch_NPC = nextPC[tid];
|
||||
Addr fetch_PC = PC[tid];
|
||||
Addr fetch_NPC = nextPC[tid];
|
||||
Addr fetch_MicroPC = microPC[tid];
|
||||
|
||||
// Fault code for memory access.
|
||||
Fault fault = NoFault;
|
||||
|
@ -1063,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
|
||||
Addr next_PC = fetch_PC;
|
||||
Addr next_NPC = fetch_NPC;
|
||||
Addr next_MicroPC = fetch_MicroPC;
|
||||
|
||||
InstSeqNum inst_seq;
|
||||
MachInst inst;
|
||||
|
@ -1070,6 +1079,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// @todo: Fix this hack.
|
||||
unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
|
||||
|
||||
StaticInstPtr staticInst = NULL;
|
||||
StaticInstPtr macroop = NULL;
|
||||
|
||||
if (fault == NoFault) {
|
||||
// If the read of the first instruction was successful, then grab the
|
||||
// instructions from the rest of the cache line and put them into the
|
||||
|
@ -1104,19 +1116,29 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// Make sure this is a valid index.
|
||||
assert(offset <= cacheBlkSize - instSize);
|
||||
|
||||
// Get the instruction from the array of the cache line.
|
||||
inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
|
||||
(&cacheData[tid][offset]));
|
||||
if (!macroop) {
|
||||
// Get the instruction from the array of the cache line.
|
||||
inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
|
||||
(&cacheData[tid][offset]));
|
||||
|
||||
predecoder.setTC(cpu->thread[tid]->getTC());
|
||||
predecoder.moreBytes(fetch_PC, 0, inst);
|
||||
predecoder.setTC(cpu->thread[tid]->getTC());
|
||||
predecoder.moreBytes(fetch_PC, 0, inst);
|
||||
|
||||
ext_inst = predecoder.getExtMachInst();
|
||||
ext_inst = predecoder.getExtMachInst();
|
||||
staticInst = StaticInstPtr(ext_inst);
|
||||
if (staticInst->isMacroOp())
|
||||
macroop = staticInst;
|
||||
}
|
||||
if (macroop) {
|
||||
staticInst = macroop->fetchMicroOp(fetch_MicroPC);
|
||||
if (staticInst->isLastMicroOp())
|
||||
macroop = NULL;
|
||||
}
|
||||
|
||||
// Create a new DynInst from the instruction fetched.
|
||||
DynInstPtr instruction = new DynInst(ext_inst,
|
||||
fetch_PC, fetch_NPC,
|
||||
next_PC, next_NPC,
|
||||
DynInstPtr instruction = new DynInst(staticInst,
|
||||
fetch_PC, fetch_NPC, fetch_MicroPC,
|
||||
next_PC, next_NPC, next_MicroPC,
|
||||
inst_seq, cpu);
|
||||
instruction->setTid(tid);
|
||||
|
||||
|
@ -1139,7 +1161,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
instruction->readPC());
|
||||
|
||||
///FIXME This needs to be more robust in dealing with delay slots
|
||||
lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
|
||||
lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
|
||||
predicted_branch |= (next_PC != fetch_NPC);
|
||||
|
||||
// Add instruction to the CPU's list of instructions.
|
||||
|
@ -1157,6 +1179,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// Move to the next instruction, unless we have a branch.
|
||||
fetch_PC = next_PC;
|
||||
fetch_NPC = next_NPC;
|
||||
fetch_MicroPC = next_MicroPC;
|
||||
|
||||
if (instruction->isQuiesce()) {
|
||||
DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
|
||||
|
@ -1167,7 +1190,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
break;
|
||||
}
|
||||
|
||||
offset += instSize;
|
||||
if (!macroop)
|
||||
offset += instSize;
|
||||
}
|
||||
|
||||
if (offset >= cacheBlkSize) {
|
||||
|
@ -1191,7 +1215,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
if (fault == NoFault) {
|
||||
PC[tid] = next_PC;
|
||||
nextPC[tid] = next_NPC;
|
||||
nextNPC[tid] = next_NPC + instSize;
|
||||
microPC[tid] = next_MicroPC;
|
||||
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
|
||||
} else {
|
||||
// We shouldn't be in an icache miss and also have a fault (an ITB
|
||||
|
@ -1210,8 +1234,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// We will use a nop in order to carry the fault.
|
||||
ext_inst = TheISA::NoopMachInst;
|
||||
|
||||
StaticInstPtr staticInst = new StaticInst(ext_inst);
|
||||
// Create a new DynInst from the dummy nop.
|
||||
DynInstPtr instruction = new DynInst(ext_inst,
|
||||
DynInstPtr instruction = new DynInst(staticInst,
|
||||
fetch_PC, fetch_NPC,
|
||||
next_PC, next_NPC,
|
||||
inst_seq, cpu);
|
||||
|
|
|
@ -454,6 +454,7 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
|
|||
#endif
|
||||
toCommit->nextPC[tid] = inst->readNextPC();
|
||||
toCommit->nextNPC[tid] = inst->readNextNPC();
|
||||
toCommit->nextMicroPC[tid] = inst->readNextMicroPC();
|
||||
|
||||
toCommit->includeSquashInst[tid] = false;
|
||||
|
||||
|
|
|
@ -963,6 +963,7 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
|
|||
// Floating point and Miscellaneous registers need their indexes
|
||||
// adjusted to account for the expanded number of flattened int regs.
|
||||
flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
|
||||
DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg);
|
||||
}
|
||||
|
||||
inst->flattenSrcReg(src_idx, flat_src_reg);
|
||||
|
@ -979,9 +980,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
|
|||
|
||||
// See if the register is ready or not.
|
||||
if (scoreboard->getReg(renamed_reg) == true) {
|
||||
DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
|
||||
DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg);
|
||||
|
||||
inst->markSrcRegReady(src_idx);
|
||||
} else {
|
||||
DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg);
|
||||
}
|
||||
|
||||
++renameRenameLookups;
|
||||
|
@ -1008,6 +1011,7 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
|
|||
// Floating point and Miscellaneous registers need their indexes
|
||||
// adjusted to account for the expanded number of flattened int regs.
|
||||
flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
|
||||
DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg);
|
||||
}
|
||||
|
||||
inst->flattenDestReg(dest_idx, flat_dest_reg);
|
||||
|
|
|
@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst<Impl>
|
|||
|
||||
public:
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
|
||||
SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a static inst pointer. */
|
||||
SparcDynInst(StaticInstPtr &_staticInst);
|
||||
|
|
|
@ -31,10 +31,23 @@
|
|||
#include "cpu/o3/sparc/dyn_inst.hh"
|
||||
|
||||
template <class Impl>
|
||||
SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
|
||||
Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
|
||||
SparcDynInst<Impl>::SparcDynInst(StaticInstPtr staticInst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
|
||||
: BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue