Merge zizzer.eecs.umich.edu:/z/m5/Bitkeeper/newmem
into zizzer.eecs.umich.edu:/.automount/wexford/x/gblack/m5/newmem-o3-spec --HG-- extra : convert_revision : d18cce378fe3390c6e708945b9ea7c76c2d20a81
This commit is contained in:
commit
dde2b11ae6
25 changed files with 560 additions and 605 deletions
|
@ -187,7 +187,7 @@ def operands {{
|
|||
'Hver': ('ControlReg', 'udw', 'MISCREG_HVER', None, 74),
|
||||
'StrandStsReg': ('ControlReg', 'udw', 'MISCREG_STRAND_STS_REG', None, 75),
|
||||
|
||||
'Fsr': ('ControlReg', 'udw', 'MISCREG_FSR', None, 80),
|
||||
'Fsr': ('ControlReg', 'udw', 'MISCREG_FSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 80),
|
||||
# Mem gets a large number so it's always last
|
||||
'Mem': ('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100)
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ namespace SparcISA
|
|||
|
||||
typedef int RegContextVal;
|
||||
|
||||
typedef uint8_t RegIndex;
|
||||
typedef uint16_t RegIndex;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** PC of this instruction. */
|
||||
Addr PC;
|
||||
|
||||
/** Micro PC of this instruction. */
|
||||
Addr microPC;
|
||||
|
||||
protected:
|
||||
/** Next non-speculative PC. It is not filled in at fetch, but rather
|
||||
* once the target of the branch is truly known (either decode or
|
||||
|
@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** Next non-speculative NPC. Target PC for Mips or Sparc. */
|
||||
Addr nextNPC;
|
||||
|
||||
/** Next non-speculative micro PC. */
|
||||
Addr nextMicroPC;
|
||||
|
||||
/** Predicted next PC. */
|
||||
Addr predPC;
|
||||
|
||||
/** Predicted next NPC. */
|
||||
Addr predNPC;
|
||||
|
||||
/** Predicted next microPC */
|
||||
Addr predMicroPC;
|
||||
|
||||
/** If this is a branch that was predicted taken */
|
||||
bool predTaken;
|
||||
|
||||
|
@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
{
|
||||
_flatDestRegIdx[idx] = flattened_dest;
|
||||
}
|
||||
/** BaseDynInst constructor given a binary instruction.
|
||||
* @param staticInst A StaticInstPtr to the underlying instruction.
|
||||
* @param PC The PC of the instruction.
|
||||
* @param pred_PC The predicted next PC.
|
||||
* @param pred_NPC The predicted next NPC.
|
||||
* @param seq_num The sequence number of the instruction.
|
||||
* @param cpu Pointer to the instruction's CPU.
|
||||
*/
|
||||
BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction.
|
||||
* @param inst The binary instruction.
|
||||
|
@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
* @param seq_num The sequence number of the instruction.
|
||||
* @param cpu Pointer to the instruction's CPU.
|
||||
*/
|
||||
BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
|
||||
Addr pred_PC, Addr pred_NPC,
|
||||
BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a StaticInst pointer.
|
||||
|
@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
#endif
|
||||
}
|
||||
|
||||
Addr readNextMicroPC()
|
||||
{
|
||||
return nextMicroPC;
|
||||
}
|
||||
|
||||
/** Set the predicted target of this current instruction. */
|
||||
void setPredTarg(Addr predicted_PC, Addr predicted_NPC)
|
||||
void setPredTarg(Addr predicted_PC, Addr predicted_NPC,
|
||||
Addr predicted_MicroPC)
|
||||
{
|
||||
predPC = predicted_PC;
|
||||
predNPC = predicted_NPC;
|
||||
predMicroPC = predicted_MicroPC;
|
||||
}
|
||||
|
||||
/** Returns the predicted PC immediately after the branch. */
|
||||
|
@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** Returns the predicted PC two instructions after the branch */
|
||||
Addr readPredNPC() { return predNPC; }
|
||||
|
||||
/** Returns the predicted micro PC after the branch */
|
||||
Addr readPredMicroPC() { return predMicroPC; }
|
||||
|
||||
/** Returns whether the instruction was predicted taken or not. */
|
||||
bool readPredTaken()
|
||||
{
|
||||
|
@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
bool mispredicted()
|
||||
{
|
||||
return readPredPC() != readNextPC() ||
|
||||
readPredNPC() != readNextNPC();
|
||||
readPredNPC() != readNextNPC() ||
|
||||
readPredMicroPC() != readNextMicroPC();
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
bool isQuiesce() const { return staticInst->isQuiesce(); }
|
||||
bool isIprAccess() const { return staticInst->isIprAccess(); }
|
||||
bool isUnverifiable() const { return staticInst->isUnverifiable(); }
|
||||
bool isMacroOp() const { return staticInst->isMacroOp(); }
|
||||
bool isMicroOp() const { return staticInst->isMicroOp(); }
|
||||
bool isDelayedCommit() const { return staticInst->isDelayedCommit(); }
|
||||
bool isLastMicroOp() const { return staticInst->isLastMicroOp(); }
|
||||
bool isFirstMicroOp() const { return staticInst->isFirstMicroOp(); }
|
||||
bool isMicroBranch() const { return staticInst->isMicroBranch(); }
|
||||
|
||||
/** Temporarily sets this instruction as a serialize before instruction. */
|
||||
void setSerializeBefore() { status.set(SerializeBefore); }
|
||||
|
@ -700,16 +737,26 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
|||
/** Read the PC of this instruction. */
|
||||
const Addr readPC() const { return PC; }
|
||||
|
||||
/**Read the micro PC of this instruction. */
|
||||
const Addr readMicroPC() const { return microPC; }
|
||||
|
||||
/** Set the next PC of this instruction (its actual target). */
|
||||
void setNextPC(uint64_t val)
|
||||
void setNextPC(Addr val)
|
||||
{
|
||||
nextPC = val;
|
||||
}
|
||||
|
||||
/** Set the next NPC of this instruction (the target in Mips or Sparc).*/
|
||||
void setNextNPC(uint64_t val)
|
||||
void setNextNPC(Addr val)
|
||||
{
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
nextNPC = val;
|
||||
#endif
|
||||
}
|
||||
|
||||
void setNextMicroPC(Addr val)
|
||||
{
|
||||
nextMicroPC = val;
|
||||
}
|
||||
|
||||
/** Sets the ASID. */
|
||||
|
|
|
@ -62,19 +62,66 @@ my_hash_t thishash;
|
|||
#endif
|
||||
|
||||
template <class Impl>
|
||||
BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst,
|
||||
BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
|
||||
Addr inst_PC, Addr inst_NPC,
|
||||
Addr inst_MicroPC,
|
||||
Addr pred_PC, Addr pred_NPC,
|
||||
Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu)
|
||||
: staticInst(machInst), traceData(NULL), cpu(cpu)
|
||||
: staticInst(_staticInst), traceData(NULL), cpu(cpu)
|
||||
{
|
||||
seqNum = seq_num;
|
||||
|
||||
bool nextIsMicro =
|
||||
staticInst->isMicroOp() && !staticInst->isLastMicroOp();
|
||||
|
||||
PC = inst_PC;
|
||||
nextPC = inst_NPC;
|
||||
nextNPC = nextPC + sizeof(TheISA::MachInst);
|
||||
microPC = inst_MicroPC;
|
||||
if (nextIsMicro) {
|
||||
nextPC = inst_PC;
|
||||
nextNPC = inst_NPC;
|
||||
nextMicroPC = microPC + 1;
|
||||
} else {
|
||||
nextPC = inst_NPC;
|
||||
nextNPC = nextPC + sizeof(TheISA::MachInst);
|
||||
nextMicroPC = 0;
|
||||
}
|
||||
predPC = pred_PC;
|
||||
predNPC = pred_NPC;
|
||||
predMicroPC = pred_MicroPC;
|
||||
predTaken = false;
|
||||
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst inst,
|
||||
Addr inst_PC, Addr inst_NPC,
|
||||
Addr inst_MicroPC,
|
||||
Addr pred_PC, Addr pred_NPC,
|
||||
Addr pred_MicroPC,
|
||||
InstSeqNum seq_num, ImplCPU *cpu)
|
||||
: staticInst(inst), traceData(NULL), cpu(cpu)
|
||||
{
|
||||
seqNum = seq_num;
|
||||
|
||||
bool nextIsMicro =
|
||||
staticInst->isMicroOp() && !staticInst->isLastMicroOp();
|
||||
|
||||
PC = inst_PC;
|
||||
microPC = inst_MicroPC;
|
||||
if (nextIsMicro) {
|
||||
nextPC = inst_PC;
|
||||
nextNPC = inst_NPC;
|
||||
nextMicroPC = microPC + 1;
|
||||
} else {
|
||||
nextPC = inst_NPC;
|
||||
nextNPC = nextPC + sizeof(TheISA::MachInst);
|
||||
nextMicroPC = 0;
|
||||
}
|
||||
predPC = pred_PC;
|
||||
predNPC = pred_NPC;
|
||||
predMicroPC = pred_MicroPC;
|
||||
predTaken = false;
|
||||
|
||||
initVars();
|
||||
|
|
|
@ -73,8 +73,13 @@ class AlphaDynInst : public BaseDynInst<Impl>
|
|||
|
||||
public:
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
|
||||
Addr Pred_PC, Addr Pred_NPC,
|
||||
AlphaDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a static inst pointer. */
|
||||
|
|
|
@ -31,10 +31,25 @@
|
|||
#include "cpu/o3/alpha/dyn_inst.hh"
|
||||
|
||||
template <class Impl>
|
||||
AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
|
||||
AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr staticInst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC,
|
||||
Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
|
||||
: BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC,
|
||||
Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
|
|
@ -87,10 +87,10 @@ struct DefaultIEWDefaultCommit {
|
|||
bool squash[Impl::MaxThreads];
|
||||
bool branchMispredict[Impl::MaxThreads];
|
||||
bool branchTaken[Impl::MaxThreads];
|
||||
bool squashDelaySlot[Impl::MaxThreads];
|
||||
uint64_t mispredPC[Impl::MaxThreads];
|
||||
uint64_t nextPC[Impl::MaxThreads];
|
||||
uint64_t nextNPC[Impl::MaxThreads];
|
||||
Addr mispredPC[Impl::MaxThreads];
|
||||
Addr nextPC[Impl::MaxThreads];
|
||||
Addr nextNPC[Impl::MaxThreads];
|
||||
Addr nextMicroPC[Impl::MaxThreads];
|
||||
InstSeqNum squashedSeqNum[Impl::MaxThreads];
|
||||
|
||||
bool includeSquashInst[Impl::MaxThreads];
|
||||
|
@ -114,15 +114,15 @@ struct TimeBufStruct {
|
|||
uint64_t branchAddr;
|
||||
|
||||
InstSeqNum doneSeqNum;
|
||||
InstSeqNum bdelayDoneSeqNum;
|
||||
|
||||
// @todo: Might want to package this kind of branch stuff into a single
|
||||
// struct as it is used pretty frequently.
|
||||
bool branchMispredict;
|
||||
bool branchTaken;
|
||||
uint64_t mispredPC;
|
||||
uint64_t nextPC;
|
||||
uint64_t nextNPC;
|
||||
Addr mispredPC;
|
||||
Addr nextPC;
|
||||
Addr nextNPC;
|
||||
Addr nextMicroPC;
|
||||
|
||||
unsigned branchCount;
|
||||
};
|
||||
|
@ -160,18 +160,16 @@ struct TimeBufStruct {
|
|||
|
||||
bool branchMispredict;
|
||||
bool branchTaken;
|
||||
uint64_t mispredPC;
|
||||
uint64_t nextPC;
|
||||
uint64_t nextNPC;
|
||||
Addr mispredPC;
|
||||
Addr nextPC;
|
||||
Addr nextNPC;
|
||||
Addr nextMicroPC;
|
||||
|
||||
// Represents the instruction that has either been retired or
|
||||
// squashed. Similar to having a single bus that broadcasts the
|
||||
// retired or squashed sequence number.
|
||||
InstSeqNum doneSeqNum;
|
||||
|
||||
InstSeqNum bdelayDoneSeqNum;
|
||||
bool squashDelaySlot;
|
||||
|
||||
//Just in case we want to do a commit/squash on a cycle
|
||||
//(necessary for multiple ROBs?)
|
||||
bool commitInsts;
|
||||
|
|
|
@ -279,25 +279,37 @@ class DefaultCommit
|
|||
/** Returns the PC of the head instruction of the ROB.
|
||||
* @todo: Probably remove this function as it returns only thread 0.
|
||||
*/
|
||||
uint64_t readPC() { return PC[0]; }
|
||||
Addr readPC() { return PC[0]; }
|
||||
|
||||
/** Returns the PC of a specific thread. */
|
||||
uint64_t readPC(unsigned tid) { return PC[tid]; }
|
||||
Addr readPC(unsigned tid) { return PC[tid]; }
|
||||
|
||||
/** Sets the PC of a specific thread. */
|
||||
void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
|
||||
void setPC(Addr val, unsigned tid) { PC[tid] = val; }
|
||||
|
||||
/** Reads the micro PC of a specific thread. */
|
||||
Addr readMicroPC(unsigned tid) { return microPC[tid]; }
|
||||
|
||||
/** Sets the micro PC of a specific thread */
|
||||
void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; }
|
||||
|
||||
/** Reads the next PC of a specific thread. */
|
||||
uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
|
||||
Addr readNextPC(unsigned tid) { return nextPC[tid]; }
|
||||
|
||||
/** Sets the next PC of a specific thread. */
|
||||
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
|
||||
void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; }
|
||||
|
||||
/** Reads the next NPC of a specific thread. */
|
||||
uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; }
|
||||
Addr readNextNPC(unsigned tid) { return nextNPC[tid]; }
|
||||
|
||||
/** Sets the next NPC of a specific thread. */
|
||||
void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
|
||||
void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; }
|
||||
|
||||
/** Reads the micro PC of a specific thread. */
|
||||
Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; }
|
||||
|
||||
/** Sets the micro PC of a specific thread */
|
||||
void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; }
|
||||
|
||||
private:
|
||||
/** Time buffer interface. */
|
||||
|
@ -402,12 +414,20 @@ class DefaultCommit
|
|||
*/
|
||||
Addr PC[Impl::MaxThreads];
|
||||
|
||||
/** The commit micro PC of each thread. Refers to the instruction that
|
||||
* is currently being processed/committed.
|
||||
*/
|
||||
Addr microPC[Impl::MaxThreads];
|
||||
|
||||
/** The next PC of each thread. */
|
||||
Addr nextPC[Impl::MaxThreads];
|
||||
|
||||
/** The next NPC of each thread. */
|
||||
Addr nextNPC[Impl::MaxThreads];
|
||||
|
||||
/** The next micro PC of each thread. */
|
||||
Addr nextMicroPC[Impl::MaxThreads];
|
||||
|
||||
/** The sequence number of the youngest valid instruction in the ROB. */
|
||||
InstSeqNum youngestSeqNum[Impl::MaxThreads];
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, Params *params)
|
|||
committedStores[i] = false;
|
||||
trapSquash[i] = false;
|
||||
tcSquash[i] = false;
|
||||
PC[i] = nextPC[i] = nextNPC[i] = 0;
|
||||
microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0;
|
||||
}
|
||||
#if FULL_SYSTEM
|
||||
interrupt = NoFault;
|
||||
|
@ -508,6 +508,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
|
|||
|
||||
toIEW->commitInfo[tid].nextPC = PC[tid];
|
||||
toIEW->commitInfo[tid].nextNPC = nextPC[tid];
|
||||
toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid];
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -741,38 +742,15 @@ DefaultCommit<Impl>::commit()
|
|||
// then use one older sequence number.
|
||||
InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
InstSeqNum bdelay_done_seq_num = squashed_inst;
|
||||
bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid];
|
||||
bool branchMispredict = fromIEW->branchMispredict[tid];
|
||||
|
||||
// Squashing/not squashing the branch delay slot only makes
|
||||
// sense when you're squashing from a branch, ie from a branch
|
||||
// mispredict.
|
||||
if (branchMispredict && !squash_bdelay_slot) {
|
||||
bdelay_done_seq_num++;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fromIEW->includeSquashInst[tid] == true) {
|
||||
squashed_inst--;
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
bdelay_done_seq_num--;
|
||||
#endif
|
||||
}
|
||||
|
||||
// All younger instructions will be squashed. Set the sequence
|
||||
// number as the youngest instruction in the ROB.
|
||||
youngestSeqNum[tid] = squashed_inst;
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
rob->squash(bdelay_done_seq_num, tid);
|
||||
toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot;
|
||||
toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num;
|
||||
#else
|
||||
rob->squash(squashed_inst, tid);
|
||||
toIEW->commitInfo[tid].squashDelaySlot = true;
|
||||
#endif
|
||||
changedROBNumEntries[tid] = true;
|
||||
|
||||
toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
|
||||
|
@ -791,6 +769,7 @@ DefaultCommit<Impl>::commit()
|
|||
|
||||
toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
|
||||
toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
|
||||
toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid];
|
||||
|
||||
toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
|
||||
|
||||
|
@ -809,10 +788,6 @@ DefaultCommit<Impl>::commit()
|
|||
|
||||
// Try to commit any instructions.
|
||||
commitInsts();
|
||||
} else {
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
skidInsert();
|
||||
#endif
|
||||
}
|
||||
|
||||
//Check for any activity
|
||||
|
@ -904,6 +879,7 @@ DefaultCommit<Impl>::commitInsts()
|
|||
PC[tid] = head_inst->readPC();
|
||||
nextPC[tid] = head_inst->readNextPC();
|
||||
nextNPC[tid] = head_inst->readNextNPC();
|
||||
nextMicroPC[tid] = head_inst->readNextMicroPC();
|
||||
|
||||
// Increment the total number of non-speculative instructions
|
||||
// executed.
|
||||
|
@ -932,12 +908,10 @@ DefaultCommit<Impl>::commitInsts()
|
|||
}
|
||||
|
||||
PC[tid] = nextPC[tid];
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
nextPC[tid] = nextNPC[tid];
|
||||
nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
|
||||
#else
|
||||
nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
|
||||
#endif
|
||||
microPC[tid] = nextMicroPC[tid];
|
||||
nextMicroPC[tid] = microPC[tid] + 1;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
int count = 0;
|
||||
|
@ -1164,37 +1138,13 @@ DefaultCommit<Impl>::getInsts()
|
|||
{
|
||||
DPRINTF(Commit, "Getting instructions from Rename stage.\n");
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
// Read any renamed instructions and place them into the ROB.
|
||||
int insts_to_process = std::min((int)renameWidth,
|
||||
(int)(fromRename->size + skidBuffer.size()));
|
||||
int rename_idx = 0;
|
||||
|
||||
DPRINTF(Commit, "%i insts available to process. Rename Insts:%i "
|
||||
"SkidBuffer Insts:%i\n", insts_to_process, fromRename->size,
|
||||
skidBuffer.size());
|
||||
#else
|
||||
// Read any renamed instructions and place them into the ROB.
|
||||
int insts_to_process = std::min((int)renameWidth, fromRename->size);
|
||||
#endif
|
||||
|
||||
|
||||
for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
|
||||
DynInstPtr inst;
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
// Get insts from skidBuffer or from Rename
|
||||
if (skidBuffer.size() > 0) {
|
||||
DPRINTF(Commit, "Grabbing skidbuffer inst.\n");
|
||||
inst = skidBuffer.front();
|
||||
skidBuffer.pop();
|
||||
} else {
|
||||
DPRINTF(Commit, "Grabbing rename inst.\n");
|
||||
inst = fromRename->insts[rename_idx++];
|
||||
}
|
||||
#else
|
||||
inst = fromRename->insts[inst_num];
|
||||
#endif
|
||||
int tid = inst->threadNumber;
|
||||
|
||||
if (!inst->isSquashed() &&
|
||||
|
@ -1216,30 +1166,6 @@ DefaultCommit<Impl>::getInsts()
|
|||
inst->readPC(), inst->seqNum, tid);
|
||||
}
|
||||
}
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
if (rename_idx < fromRename->size) {
|
||||
DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n");
|
||||
|
||||
for (;
|
||||
rename_idx < fromRename->size;
|
||||
rename_idx++) {
|
||||
DynInstPtr inst = fromRename->insts[rename_idx];
|
||||
|
||||
if (!inst->isSquashed()) {
|
||||
DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
|
||||
"skidBuffer.\n", inst->readPC(), inst->seqNum,
|
||||
inst->threadNumber);
|
||||
skidBuffer.push(inst);
|
||||
} else {
|
||||
DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
|
||||
"squashed, skipping.\n",
|
||||
inst->readPC(), inst->seqNum, inst->threadNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
|
|
@ -696,7 +696,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
|
|||
|
||||
// Squash Throughout Pipeline
|
||||
InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
|
||||
fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
|
||||
fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid);
|
||||
decode.squash(tid);
|
||||
rename.squash(squash_seq_num, tid);
|
||||
iew.squash(tid);
|
||||
|
@ -1150,6 +1150,20 @@ FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
|
|||
commit.setPC(new_PC, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readMicroPC(unsigned tid)
|
||||
{
|
||||
return commit.readMicroPC(tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::setMicroPC(Addr new_PC,unsigned tid)
|
||||
{
|
||||
commit.setMicroPC(new_PC, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readNextPC(unsigned tid)
|
||||
|
@ -1178,6 +1192,20 @@ FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
|
|||
commit.setNextNPC(val, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readNextMicroPC(unsigned tid)
|
||||
{
|
||||
return commit.readNextMicroPC(tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::setNextMicroPC(Addr new_PC,unsigned tid)
|
||||
{
|
||||
commit.setNextMicroPC(new_PC, tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename FullO3CPU<Impl>::ListIt
|
||||
FullO3CPU<Impl>::addInst(DynInstPtr &inst)
|
||||
|
@ -1226,9 +1254,7 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
|
|||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
|
||||
bool squash_delay_slot,
|
||||
const InstSeqNum &delay_slot_seq_num)
|
||||
FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
|
||||
{
|
||||
DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
|
||||
" list.\n", tid);
|
||||
|
@ -1259,12 +1285,6 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
|
|||
while (inst_it != end_it) {
|
||||
assert(!instList.empty());
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
if(!squash_delay_slot &&
|
||||
delay_slot_seq_num >= (*inst_it)->seqNum) {
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
squashInstIt(inst_it, tid);
|
||||
|
||||
inst_it--;
|
||||
|
|
|
@ -433,22 +433,34 @@ class FullO3CPU : public BaseO3CPU
|
|||
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
|
||||
|
||||
/** Reads the commit PC of a specific thread. */
|
||||
uint64_t readPC(unsigned tid);
|
||||
Addr readPC(unsigned tid);
|
||||
|
||||
/** Sets the commit PC of a specific thread. */
|
||||
void setPC(Addr new_PC, unsigned tid);
|
||||
|
||||
/** Reads the commit micro PC of a specific thread. */
|
||||
Addr readMicroPC(unsigned tid);
|
||||
|
||||
/** Sets the commmit micro PC of a specific thread. */
|
||||
void setMicroPC(Addr new_microPC, unsigned tid);
|
||||
|
||||
/** Reads the next PC of a specific thread. */
|
||||
uint64_t readNextPC(unsigned tid);
|
||||
Addr readNextPC(unsigned tid);
|
||||
|
||||
/** Sets the next PC of a specific thread. */
|
||||
void setNextPC(uint64_t val, unsigned tid);
|
||||
void setNextPC(Addr val, unsigned tid);
|
||||
|
||||
/** Reads the next NPC of a specific thread. */
|
||||
uint64_t readNextNPC(unsigned tid);
|
||||
Addr readNextNPC(unsigned tid);
|
||||
|
||||
/** Sets the next NPC of a specific thread. */
|
||||
void setNextNPC(uint64_t val, unsigned tid);
|
||||
void setNextNPC(Addr val, unsigned tid);
|
||||
|
||||
/** Reads the commit next micro PC of a specific thread. */
|
||||
Addr readNextMicroPC(unsigned tid);
|
||||
|
||||
/** Sets the commit next micro PC of a specific thread. */
|
||||
void setNextMicroPC(Addr val, unsigned tid);
|
||||
|
||||
/** Function to add instruction onto the head of the list of the
|
||||
* instructions. Used when new instructions are fetched.
|
||||
|
@ -468,8 +480,7 @@ class FullO3CPU : public BaseO3CPU
|
|||
|
||||
/** Remove all instructions that are not currently in the ROB.
|
||||
* There's also an option to not squash delay slot instructions.*/
|
||||
void removeInstsNotInROB(unsigned tid, bool squash_delay_slot,
|
||||
const InstSeqNum &delay_slot_seq_num);
|
||||
void removeInstsNotInROB(unsigned tid);
|
||||
|
||||
/** Remove all instructions younger than the given sequence number. */
|
||||
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
|
||||
|
|
|
@ -49,8 +49,6 @@ DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, Params *params)
|
|||
stalls[i].rename = false;
|
||||
stalls[i].iew = false;
|
||||
stalls[i].commit = false;
|
||||
|
||||
squashAfterDelaySlot[i] = false;
|
||||
}
|
||||
|
||||
// @todo: Make into a parameter
|
||||
|
@ -275,20 +273,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
|||
///explicitly for ISAs with delay slots.
|
||||
toFetch->decodeInfo[tid].nextNPC =
|
||||
inst->branchTarget() + sizeof(TheISA::MachInst);
|
||||
toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
|
||||
(inst->readNextPC() + sizeof(TheISA::MachInst));
|
||||
|
||||
toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid];
|
||||
squashAfterDelaySlot[tid] = false;
|
||||
|
||||
InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid];
|
||||
#else
|
||||
toFetch->decodeInfo[tid].branchTaken =
|
||||
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
|
||||
#endif
|
||||
|
||||
InstSeqNum squash_seq_num = inst->seqNum;
|
||||
#endif
|
||||
|
||||
// Might have to tell fetch to unblock.
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
|
@ -309,30 +303,10 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
|||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
if (insts[tid].front()->seqNum <= squash_seq_num) {
|
||||
DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode "
|
||||
"instructions before delay slot [sn:%i]. %i insts"
|
||||
"left in decode.\n", tid, squash_seq_num,
|
||||
insts[tid].size());
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
|
||||
DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer "
|
||||
"instructions before delay slot [sn:%i]. %i insts"
|
||||
"left in decode.\n", tid, squash_seq_num,
|
||||
insts[tid].size());
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
|
@ -760,48 +734,13 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
|
|||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
#if !ISA_HAS_DELAY_SLOT
|
||||
squash(inst, inst->threadNumber);
|
||||
Addr target = inst->branchTarget();
|
||||
inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
|
||||
//The micro pc after an instruction level branch should be 0
|
||||
inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
|
||||
break;
|
||||
#else
|
||||
// If mispredicted as taken, then ignore delay slot
|
||||
// instruction... else keep delay slot and squash
|
||||
// after it is sent to rename
|
||||
if (inst->readPredTaken() && inst->isCondDelaySlot()) {
|
||||
DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
|
||||
"[sn:%i] PC %#x mispredicted as taken.\n", tid,
|
||||
inst->seqNum, inst->PC);
|
||||
bdelayDoneSeqNum[tid] = inst->seqNum;
|
||||
squash(inst, inst->threadNumber);
|
||||
Addr target = inst->branchTarget();
|
||||
inst->setPredTarg(target,
|
||||
target + sizeof(TheISA::MachInst));
|
||||
break;
|
||||
} else {
|
||||
DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
|
||||
"[sn:%i] PC %#x, will squash after delay slot "
|
||||
"inst. is sent to Rename\n",
|
||||
tid, inst->seqNum, inst->PC);
|
||||
bdelayDoneSeqNum[tid] = inst->seqNum + 1;
|
||||
squashAfterDelaySlot[tid] = true;
|
||||
squashInst[tid] = inst;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (squashAfterDelaySlot[tid]) {
|
||||
assert(!inst->isSquashed());
|
||||
squash(squashInst[tid], squashInst[tid]->threadNumber);
|
||||
Addr target = squashInst[tid]->branchTarget();
|
||||
squashInst[tid]->setPredTarg(target,
|
||||
target + sizeof(TheISA::MachInst));
|
||||
assert(!inst->isSquashed());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't process all instructions, then we will need to block
|
||||
|
|
|
@ -227,7 +227,7 @@ class DefaultFetch
|
|||
* @param next_NPC Used for ISAs which use delay slots.
|
||||
* @return Whether or not a branch was predicted as taken.
|
||||
*/
|
||||
bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC);
|
||||
bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC);
|
||||
|
||||
/**
|
||||
* Fetches the cache line that contains fetch_PC. Returns any
|
||||
|
@ -242,12 +242,14 @@ class DefaultFetch
|
|||
bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
|
||||
|
||||
/** Squashes a specific thread and resets the PC. */
|
||||
inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
|
||||
inline void doSquash(const Addr &new_PC, const Addr &new_NPC,
|
||||
const Addr &new_MicroPC, unsigned tid);
|
||||
|
||||
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
||||
* remove any instructions between fetch and decode that should be sqaushed.
|
||||
*/
|
||||
void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid);
|
||||
|
||||
/** Checks if a thread is stalled. */
|
||||
|
@ -263,8 +265,8 @@ class DefaultFetch
|
|||
* squash should be the commit stage.
|
||||
*/
|
||||
void squash(const Addr &new_PC, const Addr &new_NPC,
|
||||
const InstSeqNum &seq_num,
|
||||
bool squash_delay_slot, unsigned tid);
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid);
|
||||
|
||||
/** Ticks the fetch stage, processing all inputs signals and fetching
|
||||
* as many instructions as possible.
|
||||
|
@ -347,16 +349,12 @@ class DefaultFetch
|
|||
/** Per-thread fetch PC. */
|
||||
Addr PC[Impl::MaxThreads];
|
||||
|
||||
/** Per-thread fetch micro PC. */
|
||||
Addr microPC[Impl::MaxThreads];
|
||||
|
||||
/** Per-thread next PC. */
|
||||
Addr nextPC[Impl::MaxThreads];
|
||||
|
||||
/** Per-thread next Next PC.
|
||||
* This is not a real register but is used for
|
||||
* architectures that use a branch-delay slot.
|
||||
* (such as MIPS or Sparc)
|
||||
*/
|
||||
Addr nextNPC[Impl::MaxThreads];
|
||||
|
||||
/** Memory request used to access cache. */
|
||||
RequestPtr memReq[Impl::MaxThreads];
|
||||
|
||||
|
|
|
@ -312,7 +312,7 @@ DefaultFetch<Impl>::initStage()
|
|||
for (int tid = 0; tid < numThreads; tid++) {
|
||||
PC[tid] = cpu->readPC(tid);
|
||||
nextPC[tid] = cpu->readNextPC(tid);
|
||||
nextNPC[tid] = cpu->readNextNPC(tid);
|
||||
microPC[tid] = cpu->readMicroPC(tid);
|
||||
}
|
||||
|
||||
for (int tid=0; tid < numThreads; tid++) {
|
||||
|
@ -439,11 +439,7 @@ DefaultFetch<Impl>::takeOverFrom()
|
|||
stalls[i].commit = 0;
|
||||
PC[i] = cpu->readPC(i);
|
||||
nextPC[i] = cpu->readNextPC(i);
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
nextNPC[i] = cpu->readNextNPC(i);
|
||||
#else
|
||||
nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
|
||||
#endif
|
||||
microPC[i] = cpu->readMicroPC(i);
|
||||
fetchStatus[i] = Running;
|
||||
}
|
||||
numInst = 0;
|
||||
|
@ -493,7 +489,7 @@ DefaultFetch<Impl>::switchToInactive()
|
|||
template <class Impl>
|
||||
bool
|
||||
DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
||||
Addr &next_NPC)
|
||||
Addr &next_NPC, Addr &next_MicroPC)
|
||||
{
|
||||
// Do branch prediction check here.
|
||||
// A bit of a misnomer...next_PC is actually the current PC until
|
||||
|
@ -501,13 +497,22 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
|||
bool predict_taken;
|
||||
|
||||
if (!inst->isControl()) {
|
||||
next_PC = next_NPC;
|
||||
next_NPC = next_NPC + instSize;
|
||||
inst->setPredTarg(next_PC, next_NPC);
|
||||
if (inst->isMicroOp() && !inst->isLastMicroOp()) {
|
||||
next_MicroPC++;
|
||||
} else {
|
||||
next_PC = next_NPC;
|
||||
next_NPC = next_NPC + instSize;
|
||||
next_MicroPC = 0;
|
||||
}
|
||||
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
|
||||
inst->setPredTaken(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
//Assume for now that all control flow is to a different macroop which
|
||||
//would reset the micro pc to 0.
|
||||
next_MicroPC = 0;
|
||||
|
||||
int tid = inst->threadNumber;
|
||||
Addr pred_PC = next_PC;
|
||||
predict_taken = branchPred.predict(inst, pred_PC, tid);
|
||||
|
@ -534,7 +539,7 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
|
|||
#endif
|
||||
/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
|
||||
tid, next_PC, next_NPC);*/
|
||||
inst->setPredTarg(next_PC, next_NPC);
|
||||
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
|
||||
inst->setPredTaken(predict_taken);
|
||||
|
||||
++fetchedBranches;
|
||||
|
@ -658,14 +663,14 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
|||
template <class Impl>
|
||||
inline void
|
||||
DefaultFetch<Impl>::doSquash(const Addr &new_PC,
|
||||
const Addr &new_NPC, unsigned tid)
|
||||
const Addr &new_NPC, const Addr &new_microPC, unsigned tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
|
||||
tid, new_PC, new_NPC);
|
||||
|
||||
PC[tid] = new_PC;
|
||||
nextPC[tid] = new_NPC;
|
||||
nextNPC[tid] = new_NPC + instSize;
|
||||
microPC[tid] = new_microPC;
|
||||
|
||||
// Clear the icache miss if it's outstanding.
|
||||
if (fetchStatus[tid] == IcacheWaitResponse) {
|
||||
|
@ -693,12 +698,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
|
|||
template<class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
|
||||
const InstSeqNum &seq_num,
|
||||
unsigned tid)
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
|
||||
|
||||
doSquash(new_PC, new_NPC, tid);
|
||||
doSquash(new_PC, new_NPC, new_MicroPC, tid);
|
||||
|
||||
// Tell the CPU to remove any instructions that are in flight between
|
||||
// fetch and decode.
|
||||
|
@ -774,20 +779,15 @@ DefaultFetch<Impl>::updateFetchStatus()
|
|||
template <class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
|
||||
const InstSeqNum &seq_num,
|
||||
bool squash_delay_slot, unsigned tid)
|
||||
const Addr &new_MicroPC,
|
||||
const InstSeqNum &seq_num, unsigned tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
|
||||
|
||||
doSquash(new_PC, new_NPC, tid);
|
||||
doSquash(new_PC, new_NPC, new_MicroPC, tid);
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
// Tell the CPU to remove any instructions that are not in the ROB.
|
||||
cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
|
||||
#else
|
||||
// Tell the CPU to remove any instructions that are not in the ROB.
|
||||
cpu->removeInstsNotInROB(tid, true, 0);
|
||||
#endif
|
||||
cpu->removeInstsNotInROB(tid);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -896,17 +896,11 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
|
|||
|
||||
DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
|
||||
"from commit.\n",tid);
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
|
||||
#else
|
||||
InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
|
||||
#endif
|
||||
// In any case, squash.
|
||||
squash(fromCommit->commitInfo[tid].nextPC,
|
||||
fromCommit->commitInfo[tid].nextNPC,
|
||||
doneSeqNum,
|
||||
fromCommit->commitInfo[tid].squashDelaySlot,
|
||||
fromCommit->commitInfo[tid].nextMicroPC,
|
||||
fromCommit->commitInfo[tid].doneSeqNum,
|
||||
tid);
|
||||
|
||||
// Also check if there's a mispredict that happened.
|
||||
|
@ -955,18 +949,14 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
|
|||
|
||||
if (fetchStatus[tid] != Squashing) {
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
|
||||
#else
|
||||
InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
|
||||
#endif
|
||||
DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
|
||||
fromDecode->decodeInfo[tid].nextPC,
|
||||
fromDecode->decodeInfo[tid].nextNPC);
|
||||
// Squash unless we're already squashing
|
||||
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
|
||||
fromDecode->decodeInfo[tid].nextNPC,
|
||||
doneSeqNum,
|
||||
fromDecode->decodeInfo[tid].nextMicroPC,
|
||||
fromDecode->decodeInfo[tid].doneSeqNum,
|
||||
tid);
|
||||
|
||||
return true;
|
||||
|
@ -1020,9 +1010,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
|
||||
|
||||
// The current PC.
|
||||
Addr &fetch_PC = PC[tid];
|
||||
|
||||
Addr &fetch_NPC = nextPC[tid];
|
||||
Addr fetch_PC = PC[tid];
|
||||
Addr fetch_NPC = nextPC[tid];
|
||||
Addr fetch_MicroPC = microPC[tid];
|
||||
|
||||
// Fault code for memory access.
|
||||
Fault fault = NoFault;
|
||||
|
@ -1081,6 +1071,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
|
||||
Addr next_PC = fetch_PC;
|
||||
Addr next_NPC = fetch_NPC;
|
||||
Addr next_MicroPC = fetch_MicroPC;
|
||||
|
||||
InstSeqNum inst_seq;
|
||||
MachInst inst;
|
||||
|
@ -1088,6 +1079,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// @todo: Fix this hack.
|
||||
unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
|
||||
|
||||
StaticInstPtr staticInst = NULL;
|
||||
StaticInstPtr macroop = NULL;
|
||||
|
||||
if (fault == NoFault) {
|
||||
// If the read of the first instruction was successful, then grab the
|
||||
// instructions from the rest of the cache line and put them into the
|
||||
|
@ -1100,11 +1094,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// ended this fetch block.
|
||||
bool predicted_branch = false;
|
||||
|
||||
for (;
|
||||
offset < cacheBlkSize &&
|
||||
numInst < fetchWidth &&
|
||||
!predicted_branch;
|
||||
++numInst) {
|
||||
while (offset < cacheBlkSize &&
|
||||
numInst < fetchWidth &&
|
||||
!predicted_branch) {
|
||||
|
||||
// If we're branching after this instruction, quite fetching
|
||||
// from the same block then.
|
||||
|
@ -1115,91 +1107,103 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
fetch_PC, fetch_NPC);
|
||||
}
|
||||
|
||||
|
||||
// Get a sequence number.
|
||||
inst_seq = cpu->getAndIncrementInstSeq();
|
||||
|
||||
// Make sure this is a valid index.
|
||||
assert(offset <= cacheBlkSize - instSize);
|
||||
|
||||
// Get the instruction from the array of the cache line.
|
||||
inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
|
||||
(&cacheData[tid][offset]));
|
||||
if (!macroop) {
|
||||
// Get the instruction from the array of the cache line.
|
||||
inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
|
||||
(&cacheData[tid][offset]));
|
||||
|
||||
predecoder.setTC(cpu->thread[tid]->getTC());
|
||||
predecoder.moreBytes(fetch_PC, 0, inst);
|
||||
predecoder.setTC(cpu->thread[tid]->getTC());
|
||||
predecoder.moreBytes(fetch_PC, 0, inst);
|
||||
|
||||
ext_inst = predecoder.getExtMachInst();
|
||||
|
||||
// Create a new DynInst from the instruction fetched.
|
||||
DynInstPtr instruction = new DynInst(ext_inst,
|
||||
fetch_PC, fetch_NPC,
|
||||
next_PC, next_NPC,
|
||||
inst_seq, cpu);
|
||||
instruction->setTid(tid);
|
||||
|
||||
instruction->setASID(tid);
|
||||
|
||||
instruction->setThreadState(cpu->thread[tid]);
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
|
||||
"[sn:%lli]\n",
|
||||
tid, instruction->readPC(), inst_seq);
|
||||
|
||||
//DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
|
||||
tid, instruction->staticInst->disassemble(fetch_PC));
|
||||
|
||||
instruction->traceData =
|
||||
Trace::getInstRecord(curTick, cpu->tcBase(tid),
|
||||
instruction->staticInst,
|
||||
instruction->readPC());
|
||||
|
||||
///FIXME This needs to be more robust in dealing with delay slots
|
||||
#if !ISA_HAS_DELAY_SLOT
|
||||
// predicted_branch |=
|
||||
#endif
|
||||
lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
|
||||
predicted_branch |= (next_PC != fetch_NPC);
|
||||
|
||||
// Add instruction to the CPU's list of instructions.
|
||||
instruction->setInstListIt(cpu->addInst(instruction));
|
||||
|
||||
// Write the instruction to the first slot in the queue
|
||||
// that heads to decode.
|
||||
toDecode->insts[numInst] = instruction;
|
||||
|
||||
toDecode->size++;
|
||||
|
||||
// Increment stat of fetched instructions.
|
||||
++fetchedInsts;
|
||||
|
||||
// Move to the next instruction, unless we have a branch.
|
||||
fetch_PC = next_PC;
|
||||
fetch_NPC = next_NPC;
|
||||
|
||||
if (instruction->isQuiesce()) {
|
||||
DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
|
||||
curTick);
|
||||
fetchStatus[tid] = QuiescePending;
|
||||
++numInst;
|
||||
status_change = true;
|
||||
break;
|
||||
ext_inst = predecoder.getExtMachInst();
|
||||
staticInst = StaticInstPtr(ext_inst);
|
||||
if (staticInst->isMacroOp())
|
||||
macroop = staticInst;
|
||||
}
|
||||
do {
|
||||
if (macroop) {
|
||||
staticInst = macroop->fetchMicroOp(fetch_MicroPC);
|
||||
if (staticInst->isLastMicroOp())
|
||||
macroop = NULL;
|
||||
}
|
||||
|
||||
// Get a sequence number.
|
||||
inst_seq = cpu->getAndIncrementInstSeq();
|
||||
|
||||
// Create a new DynInst from the instruction fetched.
|
||||
DynInstPtr instruction = new DynInst(staticInst,
|
||||
fetch_PC, fetch_NPC, fetch_MicroPC,
|
||||
next_PC, next_NPC, next_MicroPC,
|
||||
inst_seq, cpu);
|
||||
instruction->setTid(tid);
|
||||
|
||||
instruction->setASID(tid);
|
||||
|
||||
instruction->setThreadState(cpu->thread[tid]);
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
|
||||
"[sn:%lli]\n",
|
||||
tid, instruction->readPC(), inst_seq);
|
||||
|
||||
//DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
|
||||
tid, instruction->staticInst->disassemble(fetch_PC));
|
||||
|
||||
instruction->traceData =
|
||||
Trace::getInstRecord(curTick, cpu->tcBase(tid),
|
||||
instruction->staticInst,
|
||||
instruction->readPC());
|
||||
|
||||
///FIXME This needs to be more robust in dealing with delay slots
|
||||
predicted_branch |=
|
||||
lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
|
||||
|
||||
// Add instruction to the CPU's list of instructions.
|
||||
instruction->setInstListIt(cpu->addInst(instruction));
|
||||
|
||||
// Write the instruction to the first slot in the queue
|
||||
// that heads to decode.
|
||||
toDecode->insts[numInst] = instruction;
|
||||
|
||||
toDecode->size++;
|
||||
|
||||
// Increment stat of fetched instructions.
|
||||
++fetchedInsts;
|
||||
|
||||
// Move to the next instruction, unless we have a branch.
|
||||
fetch_PC = next_PC;
|
||||
fetch_NPC = next_NPC;
|
||||
fetch_MicroPC = next_MicroPC;
|
||||
|
||||
if (instruction->isQuiesce()) {
|
||||
DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
|
||||
curTick);
|
||||
fetchStatus[tid] = QuiescePending;
|
||||
++numInst;
|
||||
status_change = true;
|
||||
break;
|
||||
}
|
||||
|
||||
++numInst;
|
||||
} while (staticInst->isMicroOp() &&
|
||||
!staticInst->isLastMicroOp() &&
|
||||
numInst < fetchWidth);
|
||||
offset += instSize;
|
||||
}
|
||||
|
||||
if (offset >= cacheBlkSize) {
|
||||
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
|
||||
"block.\n", tid);
|
||||
if (predicted_branch) {
|
||||
DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
|
||||
"instruction encountered.\n", tid);
|
||||
} else if (numInst >= fetchWidth) {
|
||||
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
|
||||
"for this cycle.\n", tid);
|
||||
} else if (predicted_branch) {
|
||||
DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
|
||||
"instruction encountered.\n", tid);
|
||||
} else if (offset >= cacheBlkSize) {
|
||||
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
|
||||
"block.\n", tid);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1212,12 +1216,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
if (fault == NoFault) {
|
||||
PC[tid] = next_PC;
|
||||
nextPC[tid] = next_NPC;
|
||||
nextNPC[tid] = next_NPC + instSize;
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
|
||||
#else
|
||||
microPC[tid] = next_MicroPC;
|
||||
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
|
||||
#endif
|
||||
} else {
|
||||
// We shouldn't be in an icache miss and also have a fault (an ITB
|
||||
// miss)
|
||||
|
@ -1235,8 +1235,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
|||
// We will use a nop in order to carry the fault.
|
||||
ext_inst = TheISA::NoopMachInst;
|
||||
|
||||
StaticInstPtr staticInst = new StaticInst(ext_inst);
|
||||
// Create a new DynInst from the dummy nop.
|
||||
DynInstPtr instruction = new DynInst(ext_inst,
|
||||
DynInstPtr instruction = new DynInst(staticInst,
|
||||
fetch_PC, fetch_NPC,
|
||||
next_PC, next_NPC,
|
||||
inst_seq, cpu);
|
||||
|
|
|
@ -402,9 +402,6 @@ class DefaultIEW
|
|||
/** Records if there is a fetch redirect on this cycle for each thread. */
|
||||
bool fetchRedirect[Impl::MaxThreads];
|
||||
|
||||
/** Keeps track of the last valid branch delay slot instss for threads */
|
||||
InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads];
|
||||
|
||||
/** Used to track if all instructions have been dispatched this cycle.
|
||||
* If they have not, then blocking must have occurred, and the instructions
|
||||
* would already be added to the skid buffer.
|
||||
|
|
|
@ -69,7 +69,6 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, Params *params)
|
|||
dispatchStatus[i] = Running;
|
||||
stalls[i].commit = false;
|
||||
fetchRedirect[i] = false;
|
||||
bdelayDoneSeqNum[i] = 0;
|
||||
}
|
||||
|
||||
wbMax = wbWidth * params->wbDepth;
|
||||
|
@ -410,31 +409,14 @@ DefaultIEW<Impl>::squash(unsigned tid)
|
|||
instQueue.squash(tid);
|
||||
|
||||
// Tell the LDSTQ to start squashing.
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid);
|
||||
#else
|
||||
ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
|
||||
#endif
|
||||
updatedQueues = true;
|
||||
|
||||
// Clear the skid buffer in case it has any data in it.
|
||||
DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n",
|
||||
tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum);
|
||||
tid, fromCommit->commitInfo[tid].doneSeqNum);
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
if (skidBuffer[tid].front()->seqNum <=
|
||||
fromCommit->commitInfo[tid].bdelayDoneSeqNum) {
|
||||
DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions "
|
||||
"that occur before delay slot [sn:%i].\n",
|
||||
fromCommit->commitInfo[tid].bdelayDoneSeqNum,
|
||||
tid);
|
||||
break;
|
||||
} else {
|
||||
DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from "
|
||||
"skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum);
|
||||
}
|
||||
#endif
|
||||
if (skidBuffer[tid].front()->isLoad() ||
|
||||
skidBuffer[tid].front()->isStore() ) {
|
||||
toRename->iewInfo[tid].dispatchedToLSQ++;
|
||||
|
@ -445,8 +427,6 @@ DefaultIEW<Impl>::squash(unsigned tid)
|
|||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
|
||||
|
||||
emptyRenameInsts(tid);
|
||||
}
|
||||
|
||||
|
@ -462,38 +442,19 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
|
|||
toCommit->mispredPC[tid] = inst->readPC();
|
||||
toCommit->branchMispredict[tid] = true;
|
||||
|
||||
int instSize = sizeof(TheISA::MachInst);
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
bool branch_taken =
|
||||
int instSize = sizeof(TheISA::MachInst);
|
||||
toCommit->branchTaken[tid] =
|
||||
!(inst->readNextPC() + instSize == inst->readNextNPC() &&
|
||||
(inst->readNextPC() == inst->readPC() + instSize ||
|
||||
inst->readNextPC() == inst->readPC() + 2 * instSize));
|
||||
DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
|
||||
branch_taken ? "true": "false", inst->seqNum);
|
||||
|
||||
toCommit->branchTaken[tid] = branch_taken;
|
||||
|
||||
bool squashDelaySlot = true;
|
||||
// (inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
|
||||
DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
|
||||
squashDelaySlot ? "true": "false", inst->seqNum);
|
||||
toCommit->squashDelaySlot[tid] = squashDelaySlot;
|
||||
//If we're squashing the delay slot, we need to pick back up at NextPC.
|
||||
//Otherwise, NextPC isn't being squashed, so we should pick back up at
|
||||
//NextNPC.
|
||||
if (squashDelaySlot) {
|
||||
toCommit->nextPC[tid] = inst->readNextPC();
|
||||
toCommit->nextNPC[tid] = inst->readNextNPC();
|
||||
} else {
|
||||
toCommit->nextPC[tid] = inst->readNextNPC();
|
||||
toCommit->nextNPC[tid] = inst->readNextNPC() + instSize;
|
||||
}
|
||||
#else
|
||||
toCommit->branchTaken[tid] = inst->readNextPC() !=
|
||||
(inst->readPC() + sizeof(TheISA::MachInst));
|
||||
toCommit->nextPC[tid] = inst->readNextPC();
|
||||
toCommit->nextNPC[tid] = inst->readNextPC() + instSize;
|
||||
#endif
|
||||
toCommit->nextPC[tid] = inst->readNextPC();
|
||||
toCommit->nextNPC[tid] = inst->readNextNPC();
|
||||
toCommit->nextMicroPC[tid] = inst->readNextMicroPC();
|
||||
|
||||
toCommit->includeSquashInst[tid] = false;
|
||||
|
||||
|
@ -510,11 +471,7 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
|
|||
toCommit->squash[tid] = true;
|
||||
toCommit->squashedSeqNum[tid] = inst->seqNum;
|
||||
toCommit->nextPC[tid] = inst->readNextPC();
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
toCommit->nextNPC[tid] = inst->readNextNPC();
|
||||
#else
|
||||
toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst);
|
||||
#endif
|
||||
toCommit->branchMispredict[tid] = false;
|
||||
|
||||
toCommit->includeSquashInst[tid] = false;
|
||||
|
@ -532,11 +489,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
|
|||
toCommit->squash[tid] = true;
|
||||
toCommit->squashedSeqNum[tid] = inst->seqNum;
|
||||
toCommit->nextPC[tid] = inst->readPC();
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
toCommit->nextNPC[tid] = inst->readNextPC();
|
||||
#else
|
||||
toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst);
|
||||
#endif
|
||||
toCommit->branchMispredict[tid] = false;
|
||||
|
||||
// Must include the broadcasted SN in the squash.
|
||||
|
@ -880,10 +833,8 @@ DefaultIEW<Impl>::sortInsts()
|
|||
{
|
||||
int insts_from_rename = fromRename->size;
|
||||
#ifdef DEBUG
|
||||
#if !ISA_HAS_DELAY_SLOT
|
||||
for (int i = 0; i < numThreads; i++)
|
||||
assert(insts[i].empty());
|
||||
#endif
|
||||
#endif
|
||||
for (int i = 0; i < insts_from_rename; ++i) {
|
||||
insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
|
||||
|
@ -894,21 +845,9 @@ template <class Impl>
|
|||
void
|
||||
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
|
||||
{
|
||||
DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until "
|
||||
"[sn:%i].\n", tid, bdelayDoneSeqNum[tid]);
|
||||
DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions\n", tid);
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) {
|
||||
DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction"
|
||||
" that occurs at or before delay slot [sn:%i].\n",
|
||||
tid, bdelayDoneSeqNum[tid]);
|
||||
break;
|
||||
} else {
|
||||
DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction "
|
||||
"[sn:%i].\n", tid, insts[tid].front()->seqNum);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (insts[tid].front()->isLoad() ||
|
||||
insts[tid].front()->isStore() ) {
|
||||
|
|
|
@ -1005,11 +1005,7 @@ InstructionQueue<Impl>::squash(unsigned tid)
|
|||
|
||||
// Read instruction sequence number of last instruction out of the
|
||||
// time buffer.
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
|
||||
#else
|
||||
squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
|
||||
#endif
|
||||
|
||||
// Call doSquash if there are insts in the IQ
|
||||
if (count[tid] > 0) {
|
||||
|
|
|
@ -69,10 +69,16 @@ class MipsDynInst : public BaseDynInst<Impl>
|
|||
};
|
||||
|
||||
public:
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
MipsDynInst(StaticInstPtr staticInst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
MipsDynInst(ExtMachInst inst,
|
||||
Addr PC, Addr NPC,
|
||||
Addr Pred_PC, Addr Pred_NPC,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a static inst pointer. */
|
||||
|
|
|
@ -31,11 +31,23 @@
|
|||
#include "cpu/o3/mips/dyn_inst.hh"
|
||||
|
||||
template <class Impl>
|
||||
MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
|
||||
Addr PC, Addr NPC,
|
||||
Addr Pred_PC, Addr Pred_NPC,
|
||||
MipsDynInst<Impl>::MipsDynInst(StaticInstPtr staticInst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
|
||||
: BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
|
|
@ -356,47 +356,12 @@ DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
|
|||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them. Since we support multiple ISAs, we cant just:
|
||||
// "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is
|
||||
// a possible delay slot inst for different architectures
|
||||
// insts[tid].clear();
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until "
|
||||
"[sn:%i].\n",tid, squash_seq_num);
|
||||
ListIt ilist_it = insts[tid].begin();
|
||||
while (ilist_it != insts[tid].end()) {
|
||||
if ((*ilist_it)->seqNum > squash_seq_num) {
|
||||
(*ilist_it)->setSquashed();
|
||||
DPRINTF(Rename, "Squashing incoming decode instruction, "
|
||||
"[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC);
|
||||
}
|
||||
ilist_it++;
|
||||
}
|
||||
#else
|
||||
// insts in them.
|
||||
insts[tid].clear();
|
||||
#endif
|
||||
|
||||
// Clear the skid buffer in case it has any data in it.
|
||||
// See comments above.
|
||||
// skidBuffer[tid].clear();
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions "
|
||||
"until [sn:%i].\n", tid, squash_seq_num);
|
||||
ListIt slist_it = skidBuffer[tid].begin();
|
||||
while (slist_it != skidBuffer[tid].end()) {
|
||||
if ((*slist_it)->seqNum > squash_seq_num) {
|
||||
(*slist_it)->setSquashed();
|
||||
DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]"
|
||||
"PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC);
|
||||
}
|
||||
slist_it++;
|
||||
}
|
||||
resumeUnblocking = (skidBuffer[tid].size() != 0);
|
||||
DPRINTF(Rename, "Resume unblocking set to %s\n",
|
||||
resumeUnblocking ? "true" : "false");
|
||||
#else
|
||||
skidBuffer[tid].clear();
|
||||
#endif
|
||||
|
||||
doSquash(squash_seq_num, tid);
|
||||
}
|
||||
|
||||
|
@ -776,10 +741,8 @@ DefaultRename<Impl>::sortInsts()
|
|||
{
|
||||
int insts_from_decode = fromDecode->size;
|
||||
#ifdef DEBUG
|
||||
#if !ISA_HAS_DELAY_SLOT
|
||||
for (int i=0; i < numThreads; i++)
|
||||
assert(insts[i].empty());
|
||||
#endif
|
||||
#endif
|
||||
for (int i = 0; i < insts_from_decode; ++i) {
|
||||
DynInstPtr inst = fromDecode->insts[i];
|
||||
|
@ -1000,6 +963,7 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
|
|||
// Floating point and Miscellaneous registers need their indexes
|
||||
// adjusted to account for the expanded number of flattened int regs.
|
||||
flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
|
||||
DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg);
|
||||
}
|
||||
|
||||
inst->flattenSrcReg(src_idx, flat_src_reg);
|
||||
|
@ -1016,9 +980,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
|
|||
|
||||
// See if the register is ready or not.
|
||||
if (scoreboard->getReg(renamed_reg) == true) {
|
||||
DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
|
||||
DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg);
|
||||
|
||||
inst->markSrcRegReady(src_idx);
|
||||
} else {
|
||||
DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg);
|
||||
}
|
||||
|
||||
++renameRenameLookups;
|
||||
|
@ -1045,6 +1011,7 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
|
|||
// Floating point and Miscellaneous registers need their indexes
|
||||
// adjusted to account for the expanded number of flattened int regs.
|
||||
flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
|
||||
DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg);
|
||||
}
|
||||
|
||||
inst->flattenDestReg(dest_idx, flat_dest_reg);
|
||||
|
@ -1248,13 +1215,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
|
|||
DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
|
||||
"commit.\n", tid);
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
|
||||
#else
|
||||
InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
|
||||
#endif
|
||||
|
||||
squash(squashed_seq_num, tid);
|
||||
squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst<Impl>
|
|||
|
||||
public:
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
|
||||
SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a static inst pointer. */
|
||||
SparcDynInst(StaticInstPtr &_staticInst);
|
||||
|
|
|
@ -31,10 +31,23 @@
|
|||
#include "cpu/o3/sparc/dyn_inst.hh"
|
||||
|
||||
template <class Impl>
|
||||
SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
|
||||
Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
|
||||
SparcDynInst<Impl>::SparcDynInst(StaticInstPtr staticInst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
|
||||
: BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
|
||||
Addr PC, Addr NPC, Addr microPC,
|
||||
Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(inst, PC, NPC, microPC,
|
||||
Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
|
||||
global.BPredUnit.BTBHits 2990 # Number of BTB hits
|
||||
global.BPredUnit.BTBLookups 7055 # Number of BTB lookups
|
||||
global.BPredUnit.BTBHits 3021 # Number of BTB hits
|
||||
global.BPredUnit.BTBLookups 7086 # Number of BTB lookups
|
||||
global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions.
|
||||
global.BPredUnit.condIncorrect 2077 # Number of conditional branches incorrect
|
||||
global.BPredUnit.condPredicted 7846 # Number of conditional branches predicted
|
||||
global.BPredUnit.lookups 7846 # Number of BP lookups
|
||||
global.BPredUnit.condPredicted 7877 # Number of conditional branches predicted
|
||||
global.BPredUnit.lookups 7877 # Number of BP lookups
|
||||
global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target.
|
||||
host_inst_rate 15119 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 154868 # Number of bytes of host memory used
|
||||
host_seconds 0.73 # Real time elapsed on the host
|
||||
host_tick_rate 1956796 # Simulator tick rate (ticks/s)
|
||||
host_inst_rate 4388 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 179936 # Number of bytes of host memory used
|
||||
host_seconds 2.50 # Real time elapsed on the host
|
||||
host_tick_rate 568121 # Simulator tick rate (ticks/s)
|
||||
memdepunit.memDep.conflictingLoads 12 # Number of conflicting loads.
|
||||
memdepunit.memDep.conflictingStores 0 # Number of conflicting stores.
|
||||
memdepunit.memDep.insertedLoads 3250 # Number of loads inserted to the mem dependence unit.
|
||||
|
@ -19,22 +19,22 @@ memdepunit.memDep.insertedStores 2817 # Nu
|
|||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 10976 # Number of instructions simulated
|
||||
sim_seconds 0.000001 # Number of seconds simulated
|
||||
sim_ticks 1421211 # Number of ticks simulated
|
||||
sim_ticks 1421207 # Number of ticks simulated
|
||||
system.cpu.commit.COM:branches 2152 # Number of branches committed
|
||||
system.cpu.commit.COM:bw_lim_events 172 # number cycles where commit BW limit reached
|
||||
system.cpu.commit.COM:bw_lim_events 225 # number cycles where commit BW limit reached
|
||||
system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits
|
||||
system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle
|
||||
system.cpu.commit.COM:committed_per_cycle.samples 221349
|
||||
system.cpu.commit.COM:committed_per_cycle.samples 220766
|
||||
system.cpu.commit.COM:committed_per_cycle.min_value 0
|
||||
0 215844 9751.30%
|
||||
1 2970 134.18%
|
||||
2 1290 58.28%
|
||||
3 631 28.51%
|
||||
4 208 9.40%
|
||||
5 90 4.07%
|
||||
6 133 6.01%
|
||||
0 215368 9755.49%
|
||||
1 2915 132.04%
|
||||
2 1196 54.18%
|
||||
3 673 30.48%
|
||||
4 208 9.42%
|
||||
5 79 3.58%
|
||||
6 91 4.12%
|
||||
7 11 0.50%
|
||||
8 172 7.77%
|
||||
8 225 10.19%
|
||||
system.cpu.commit.COM:committed_per_cycle.max_value 8
|
||||
system.cpu.commit.COM:committed_per_cycle.end_dist
|
||||
|
||||
|
@ -49,65 +49,65 @@ system.cpu.commit.commitNonSpecStalls 327 # Th
|
|||
system.cpu.commit.commitSquashedInsts 14263 # The number of squashed insts skipped by commit
|
||||
system.cpu.committedInsts 10976 # Number of Instructions Simulated
|
||||
system.cpu.committedInsts_total 10976 # Number of Instructions Simulated
|
||||
system.cpu.cpi 129.483509 # CPI: Cycles Per Instruction
|
||||
system.cpu.cpi_total 129.483509 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 2737 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 6585.044776 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6511.939394 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 2603 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 882396 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.048959 # miss rate for ReadReq accesses
|
||||
system.cpu.cpi 129.483145 # CPI: Cycles Per Instruction
|
||||
system.cpu.cpi_total 129.483145 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 2738 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 6586.074627 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6513.166667 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 2604 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 882534 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.048941 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 134 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_hits 68 # number of ReadReq MSHR hits
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 429788 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.024114 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 429869 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.024105 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 66 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses)
|
||||
system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits
|
||||
system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 7960.583924 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7136.918605 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 7962.583924 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7138.593023 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 869 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 3367327 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 3368173 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.327399 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 423 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_hits 337 # number of WriteReq MSHR hits
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 613775 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 613919 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.066563 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 86 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0> # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_refs 22.881579 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.avg_refs 22.888158 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 4029 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 7629.664273 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 3472 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 4249723 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.138248 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_accesses 4030 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 7631.430880 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 6867.026316 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 3473 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 4250707 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.138213 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 557 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 405 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 1043563 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.037726 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_miss_latency 1043788 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.037717 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 152 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.overall_accesses 4029 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 7629.664273 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_accesses 4030 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 7631.430880 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 6867.026316 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 3472 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 4249723 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.138248 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_hits 3473 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 4250707 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.138213 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 557 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 405 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 1043563 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.037726 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_miss_latency 1043788 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.037717 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 152 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
|
@ -123,50 +123,50 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0
|
|||
system.cpu.dcache.replacements 0 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 152 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 90.938737 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 3478 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.tagsinuse 90.938565 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 3479 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 0 # number of writebacks
|
||||
system.cpu.decode.DECODE:BlockedCycles 192719 # Number of cycles decode is blocked
|
||||
system.cpu.decode.DECODE:DecodedInsts 39774 # Number of instructions handled by decode
|
||||
system.cpu.decode.DECODE:IdleCycles 20128 # Number of cycles decode is idle
|
||||
system.cpu.decode.DECODE:RunCycles 8238 # Number of cycles decode is running
|
||||
system.cpu.decode.DECODE:BlockedCycles 192302 # Number of cycles decode is blocked
|
||||
system.cpu.decode.DECODE:DecodedInsts 39763 # Number of instructions handled by decode
|
||||
system.cpu.decode.DECODE:IdleCycles 19973 # Number of cycles decode is idle
|
||||
system.cpu.decode.DECODE:RunCycles 8441 # Number of cycles decode is running
|
||||
system.cpu.decode.DECODE:SquashCycles 3162 # Number of cycles decode is squashing
|
||||
system.cpu.decode.DECODE:UnblockCycles 264 # Number of cycles decode is unblocking
|
||||
system.cpu.fetch.Branches 7846 # Number of branches that fetch encountered
|
||||
system.cpu.decode.DECODE:UnblockCycles 50 # Number of cycles decode is unblocking
|
||||
system.cpu.fetch.Branches 7877 # Number of branches that fetch encountered
|
||||
system.cpu.fetch.CacheLines 5085 # Number of cache lines fetched
|
||||
system.cpu.fetch.Cycles 14399 # Number of cycles fetch has run and was not squashing or blocked
|
||||
system.cpu.fetch.Cycles 14430 # Number of cycles fetch has run and was not squashing or blocked
|
||||
system.cpu.fetch.IcacheSquashes 745 # Number of outstanding Icache misses that were squashed
|
||||
system.cpu.fetch.Insts 43304 # Number of instructions fetch has processed
|
||||
system.cpu.fetch.Insts 43366 # Number of instructions fetch has processed
|
||||
system.cpu.fetch.SquashCycles 2134 # Number of cycles fetch has spent squashing
|
||||
system.cpu.fetch.branchRate 0.034947 # Number of branch fetches per cycle
|
||||
system.cpu.fetch.branchRate 0.035176 # Number of branch fetches per cycle
|
||||
system.cpu.fetch.icacheStallCycles 5085 # Number of cycles fetch is stalled on an Icache miss
|
||||
system.cpu.fetch.predictedBranches 2990 # Number of branches that fetch has predicted taken
|
||||
system.cpu.fetch.rate 0.192881 # Number of inst fetches per cycle
|
||||
system.cpu.fetch.predictedBranches 3021 # Number of branches that fetch has predicted taken
|
||||
system.cpu.fetch.rate 0.193660 # Number of inst fetches per cycle
|
||||
system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total)
|
||||
system.cpu.fetch.rateDist.samples 224511
|
||||
system.cpu.fetch.rateDist.samples 223928
|
||||
system.cpu.fetch.rateDist.min_value 0
|
||||
0 215198 9585.19%
|
||||
1 2258 100.57%
|
||||
2 627 27.93%
|
||||
3 958 42.67%
|
||||
4 553 24.63%
|
||||
5 816 36.35%
|
||||
6 951 42.36%
|
||||
7 280 12.47%
|
||||
8 2870 127.83%
|
||||
0 214584 9582.72%
|
||||
1 2258 100.84%
|
||||
2 658 29.38%
|
||||
3 958 42.78%
|
||||
4 553 24.70%
|
||||
5 816 36.44%
|
||||
6 951 42.47%
|
||||
7 280 12.50%
|
||||
8 2870 128.17%
|
||||
system.cpu.fetch.rateDist.max_value 8
|
||||
system.cpu.fetch.rateDist.end_dist
|
||||
|
||||
system.cpu.icache.ReadReq_accesses 5085 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 5148.266776 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 4502.972752 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 5150.152209 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 4503.673025 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 4474 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 3145591 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_latency 3146743 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.120157 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 611 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 244 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 1652591 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 1652848 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.072173 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 367 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
|
||||
|
@ -178,29 +178,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n
|
|||
system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 5085 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 5148.266776 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_avg_miss_latency 5150.152209 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 4503.673025 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 4474 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 3145591 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_latency 3146743 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.120157 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 611 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 244 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 1652591 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_latency 1652848 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.072173 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 367 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.overall_accesses 5085 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 5148.266776 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_miss_latency 5150.152209 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 4503.673025 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 4474 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 3145591 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_latency 3146743 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.120157 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 611 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 244 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 1652591 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_latency 1652848 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.072173 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 367 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -217,35 +217,35 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0
|
|||
system.cpu.icache.replacements 1 # number of replacements
|
||||
system.cpu.icache.sampled_refs 363 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 172.869174 # Cycle average of tags in use
|
||||
system.cpu.icache.tagsinuse 172.868641 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 4474 # Total number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.idleCycles 1196701 # Total number of cycles that the CPU has spent unscheduled due to idling
|
||||
system.cpu.iew.EXEC:branches 3576 # Number of branches executed
|
||||
system.cpu.idleCycles 1197280 # Total number of cycles that the CPU has spent unscheduled due to idling
|
||||
system.cpu.iew.EXEC:branches 3577 # Number of branches executed
|
||||
system.cpu.iew.EXEC:nop 0 # number of nop insts executed
|
||||
system.cpu.iew.EXEC:rate 0.092548 # Inst execution rate
|
||||
system.cpu.iew.EXEC:refs 5257 # number of memory reference insts executed
|
||||
system.cpu.iew.EXEC:rate 0.092802 # Inst execution rate
|
||||
system.cpu.iew.EXEC:refs 5258 # number of memory reference insts executed
|
||||
system.cpu.iew.EXEC:stores 2386 # Number of stores executed
|
||||
system.cpu.iew.EXEC:swp 0 # number of swp insts executed
|
||||
system.cpu.iew.WB:consumers 9737 # num instructions consuming a value
|
||||
system.cpu.iew.WB:count 19769 # cumulative count of insts written-back
|
||||
system.cpu.iew.WB:count 19771 # cumulative count of insts written-back
|
||||
system.cpu.iew.WB:fanout 0.790901 # average fanout of values written-back
|
||||
system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ
|
||||
system.cpu.iew.WB:producers 7701 # num instructions producing a value
|
||||
system.cpu.iew.WB:rate 0.088054 # insts written-back per cycle
|
||||
system.cpu.iew.WB:sent 20061 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.branchMispredicts 2593 # Number of branch mispredicts detected at execute
|
||||
system.cpu.iew.WB:rate 0.088292 # insts written-back per cycle
|
||||
system.cpu.iew.WB:sent 20063 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.branchMispredicts 2594 # Number of branch mispredicts detected at execute
|
||||
system.cpu.iew.iewBlockCycles 476 # Number of cycles IEW is blocking
|
||||
system.cpu.iew.iewDispLoadInsts 3250 # Number of dispatched load instructions
|
||||
system.cpu.iew.iewDispNonSpecInsts 617 # Number of dispatched non-speculative instructions
|
||||
system.cpu.iew.iewDispSquashedInsts 2705 # Number of squashed instructions skipped by dispatch
|
||||
system.cpu.iew.iewDispSquashedInsts 2694 # Number of squashed instructions skipped by dispatch
|
||||
system.cpu.iew.iewDispStoreInsts 2817 # Number of dispatched store instructions
|
||||
system.cpu.iew.iewDispatchedInsts 25240 # Number of instructions dispatched to IQ
|
||||
system.cpu.iew.iewExecLoadInsts 2871 # Number of load instructions executed
|
||||
system.cpu.iew.iewExecSquashedInsts 1780 # Number of squashed instructions skipped in execute
|
||||
system.cpu.iew.iewExecutedInsts 20778 # Number of executed instructions
|
||||
system.cpu.iew.iewExecLoadInsts 2872 # Number of load instructions executed
|
||||
system.cpu.iew.iewExecSquashedInsts 1777 # Number of squashed instructions skipped in execute
|
||||
system.cpu.iew.iewExecutedInsts 20781 # Number of executed instructions
|
||||
system.cpu.iew.iewIQFullEvents 7 # Number of times the IQ has become full, causing a stall
|
||||
system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle
|
||||
system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall
|
||||
|
@ -262,7 +262,7 @@ system.cpu.iew.lsq.thread.0.rescheduledLoads 0
|
|||
system.cpu.iew.lsq.thread.0.squashedLoads 1788 # Number of loads squashed
|
||||
system.cpu.iew.lsq.thread.0.squashedStores 1519 # Number of stores squashed
|
||||
system.cpu.iew.memOrderViolationEvents 54 # Number of memory order violations
|
||||
system.cpu.iew.predictedNotTakenIncorrect 962 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu.iew.predictedNotTakenIncorrect 963 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu.iew.predictedTakenIncorrect 1631 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu.ipc 0.007723 # IPC: Instructions Per Cycle
|
||||
system.cpu.ipc_total 0.007723 # IPC: Total IPC of All Threads
|
||||
|
@ -302,21 +302,21 @@ system.cpu.iq.ISSUE:fu_full.start_dist
|
|||
InstPrefetch 0 0.00% # attempts to use FU when none available
|
||||
system.cpu.iq.ISSUE:fu_full.end_dist
|
||||
system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle
|
||||
system.cpu.iq.ISSUE:issued_per_cycle.samples 224511
|
||||
system.cpu.iq.ISSUE:issued_per_cycle.samples 223928
|
||||
system.cpu.iq.ISSUE:issued_per_cycle.min_value 0
|
||||
0 215315 9590.40%
|
||||
1 4124 183.69%
|
||||
2 1297 57.77%
|
||||
3 1306 58.17%
|
||||
4 1190 53.00%
|
||||
5 707 31.49%
|
||||
6 433 19.29%
|
||||
7 83 3.70%
|
||||
8 56 2.49%
|
||||
0 214838 9594.07%
|
||||
1 3976 177.56%
|
||||
2 1244 55.55%
|
||||
3 1359 60.69%
|
||||
4 1316 58.77%
|
||||
5 612 27.33%
|
||||
6 444 19.83%
|
||||
7 83 3.71%
|
||||
8 56 2.50%
|
||||
system.cpu.iq.ISSUE:issued_per_cycle.max_value 8
|
||||
system.cpu.iq.ISSUE:issued_per_cycle.end_dist
|
||||
|
||||
system.cpu.iq.ISSUE:rate 0.100476 # Inst issue rate
|
||||
system.cpu.iq.ISSUE:rate 0.100738 # Inst issue rate
|
||||
system.cpu.iq.iqInstsAdded 24623 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqInstsIssued 22558 # Number of instructions issued
|
||||
system.cpu.iq.iqNonSpecInstsAdded 617 # Number of non-speculative instructions added to the IQ
|
||||
|
@ -325,12 +325,12 @@ system.cpu.iq.iqSquashedInstsIssued 174 # Nu
|
|||
system.cpu.iq.iqSquashedNonSpecRemoved 290 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.iqSquashedOperandsExamined 5834 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.l2cache.ReadReq_accesses 513 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 4754.779727 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.506823 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_miss_latency 2439202 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 4755.715400 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.752437 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_miss_latency 2439682 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 513 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1202219 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1202345 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 513 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0> # average number of cycles each access was blocked
|
||||
|
@ -342,29 +342,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 #
|
|||
system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 513 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 4754.779727 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_avg_miss_latency 4755.715400 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.752437 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 2439202 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_latency 2439682 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 513 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 1202219 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 1202345 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 513 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.overall_accesses 513 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.overall_avg_miss_latency 4754.779727 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_miss_latency 4755.715400 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.752437 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 0 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 2439202 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_latency 2439682 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 513 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 1202219 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 1202345 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 513 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -381,28 +381,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0
|
|||
system.cpu.l2cache.replacements 0 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 512 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 262.946375 # Cycle average of tags in use
|
||||
system.cpu.l2cache.tagsinuse 262.945674 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 0 # number of writebacks
|
||||
system.cpu.numCycles 224511 # number of cpu cycles simulated
|
||||
system.cpu.numCycles 223928 # number of cpu cycles simulated
|
||||
system.cpu.rename.RENAME:BlockCycles 960 # Number of cycles rename is blocking
|
||||
system.cpu.rename.RENAME:CommittedMaps 9868 # Number of HB maps that are committed
|
||||
system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full
|
||||
system.cpu.rename.RENAME:IdleCycles 20098 # Number of cycles rename is idle
|
||||
system.cpu.rename.RENAME:LSQFullEvents 481 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu.rename.RENAME:IdleCycles 21302 # Number of cycles rename is idle
|
||||
system.cpu.rename.RENAME:LSQFullEvents 411 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu.rename.RENAME:ROBFullEvents 4 # Number of times rename has blocked due to ROB full
|
||||
system.cpu.rename.RENAME:RenameLookups 46931 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.RENAME:RenamedInsts 31260 # Number of instructions processed by rename
|
||||
system.cpu.rename.RENAME:RenamedInsts 31249 # Number of instructions processed by rename
|
||||
system.cpu.rename.RENAME:RenamedOperands 25831 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RENAME:RunCycles 7921 # Number of cycles rename is running
|
||||
system.cpu.rename.RENAME:RunCycles 7136 # Number of cycles rename is running
|
||||
system.cpu.rename.RENAME:SquashCycles 3162 # Number of cycles rename is squashing
|
||||
system.cpu.rename.RENAME:SquashedInsts 8042 # Number of squashed instructions processed by rename
|
||||
system.cpu.rename.RENAME:UnblockCycles 1212 # Number of cycles rename is unblocking
|
||||
system.cpu.rename.RENAME:UnblockCycles 614 # Number of cycles rename is unblocking
|
||||
system.cpu.rename.RENAME:UndoneMaps 15963 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.RENAME:serializeStallCycles 190573 # count of cycles rename stalled for serializing inst
|
||||
system.cpu.rename.RENAME:serializeStallCycles 190754 # count of cycles rename stalled for serializing inst
|
||||
system.cpu.rename.RENAME:serializingInsts 638 # count of serializing insts renamed
|
||||
system.cpu.rename.RENAME:skidInsts 5594 # count of insts added to the skid buffer
|
||||
system.cpu.rename.RENAME:skidInsts 5529 # count of insts added to the skid buffer
|
||||
system.cpu.rename.RENAME:tempSerializingInsts 629 # count of temporary serializing insts renamed
|
||||
system.cpu.timesIdled 289 # Number of times that the entire CPU went into an idle state and unscheduled itself
|
||||
system.cpu.workload.PROG:num_syscalls 8 # Number of system calls
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
warn: More than two loadable segments in ELF object.
|
||||
warn: Ignoring segment @ 0x0 length 0x0.
|
||||
0: system.remote_gdb.listener: listening for remote gdb on port 7003
|
||||
warn: Entering event queue @ 0. Starting simulation...
|
||||
|
|
|
@ -16,9 +16,9 @@ The Regents of The University of Michigan
|
|||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Apr 9 2007 03:06:26
|
||||
M5 started Mon Apr 9 03:06:54 2007
|
||||
M5 executing on zizzer.eecs.umich.edu
|
||||
M5 compiled Apr 13 2007 13:56:34
|
||||
M5 started Fri Apr 13 13:56:35 2007
|
||||
M5 executing on ahchoo.blinky.homelinux.org
|
||||
command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
Exiting @ tick 1421211 because target called exit()
|
||||
Exiting @ tick 1421207 because target called exit()
|
||||
|
|
Loading…
Reference in a new issue