merge
This commit is contained in:
commit
4e00cc9900
45 changed files with 2659 additions and 1399 deletions
|
@ -30,10 +30,15 @@ from m5.params import *
|
|||
from m5.proxy import *
|
||||
from BaseCPU import BaseCPU
|
||||
|
||||
class ThreadModel(Enum):
|
||||
vals = ['Single', 'SMT', 'SwitchOnCacheMiss']
|
||||
|
||||
class InOrderCPU(BaseCPU):
|
||||
type = 'InOrderCPU'
|
||||
activity = Param.Unsigned(0, "Initial count")
|
||||
|
||||
threadModel = Param.ThreadModel('SMT', "Multithreading model (SE-MODE only)")
|
||||
|
||||
cachePorts = Param.Unsigned(2, "Cache Ports")
|
||||
stageWidth = Param.Unsigned(1, "Stage width")
|
||||
|
||||
|
|
|
@ -52,12 +52,16 @@ if 'InOrderCPU' in env['CPU_MODELS']:
|
|||
TraceFlag('InOrderUseDef')
|
||||
TraceFlag('InOrderMDU')
|
||||
TraceFlag('InOrderGraduation')
|
||||
TraceFlag('ThreadModel')
|
||||
TraceFlag('RefCount')
|
||||
TraceFlag('AddrDep')
|
||||
|
||||
|
||||
CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU',
|
||||
'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred',
|
||||
'InOrderDecode', 'InOrderExecute', 'InOrderInstBuffer', 'InOrderUseDef',
|
||||
'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource'])
|
||||
'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource',
|
||||
'ThreadModel'])
|
||||
|
||||
Source('pipeline_traits.cc')
|
||||
Source('inorder_dyn_inst.cc')
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -93,13 +93,27 @@ class InOrderCPU : public BaseCPU
|
|||
public:
|
||||
/** Constructs a CPU with the given parameters. */
|
||||
InOrderCPU(Params *params);
|
||||
/* Destructor */
|
||||
~InOrderCPU();
|
||||
|
||||
/** CPU ID */
|
||||
int cpu_id;
|
||||
|
||||
// SE Mode ASIDs
|
||||
ThreadID asid[ThePipeline::MaxThreads];
|
||||
|
||||
/** Type of core that this is */
|
||||
std::string coreType;
|
||||
|
||||
// Only need for SE MODE
|
||||
enum ThreadModel {
|
||||
Single,
|
||||
SMT,
|
||||
SwitchOnCacheMiss
|
||||
};
|
||||
|
||||
ThreadModel threadModel;
|
||||
|
||||
int readCpuId() { return cpu_id; }
|
||||
|
||||
void setCpuId(int val) { cpu_id = val; }
|
||||
|
@ -117,7 +131,6 @@ class InOrderCPU : public BaseCPU
|
|||
|
||||
/** Overall CPU status. */
|
||||
Status _status;
|
||||
|
||||
private:
|
||||
/** Define TickEvent for the CPU */
|
||||
class TickEvent : public Event
|
||||
|
@ -144,9 +157,11 @@ class InOrderCPU : public BaseCPU
|
|||
void scheduleTickEvent(int delay)
|
||||
{
|
||||
if (tickEvent.squashed())
|
||||
mainEventQueue.reschedule(&tickEvent, nextCycle(curTick + ticks(delay)));
|
||||
mainEventQueue.reschedule(&tickEvent,
|
||||
nextCycle(curTick + ticks(delay)));
|
||||
else if (!tickEvent.scheduled())
|
||||
mainEventQueue.schedule(&tickEvent, nextCycle(curTick + ticks(delay)));
|
||||
mainEventQueue.schedule(&tickEvent,
|
||||
nextCycle(curTick + ticks(delay)));
|
||||
}
|
||||
|
||||
/** Unschedule tick event, regardless of its current state. */
|
||||
|
@ -165,15 +180,13 @@ class InOrderCPU : public BaseCPU
|
|||
// pool event.
|
||||
enum CPUEventType {
|
||||
ActivateThread,
|
||||
DeallocateThread,
|
||||
ActivateNextReadyThread,
|
||||
DeactivateThread,
|
||||
HaltThread,
|
||||
SuspendThread,
|
||||
DisableThreads,
|
||||
EnableThreads,
|
||||
DisableVPEs,
|
||||
EnableVPEs,
|
||||
Trap,
|
||||
InstGraduated,
|
||||
SquashAll,
|
||||
SquashFromMemStall,
|
||||
UpdatePCs,
|
||||
NumCPUEvents
|
||||
};
|
||||
|
@ -189,22 +202,24 @@ class InOrderCPU : public BaseCPU
|
|||
public:
|
||||
CPUEventType cpuEventType;
|
||||
ThreadID tid;
|
||||
unsigned vpe;
|
||||
DynInstPtr inst;
|
||||
Fault fault;
|
||||
unsigned vpe;
|
||||
|
||||
public:
|
||||
/** Constructs a CPU event. */
|
||||
CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault,
|
||||
ThreadID _tid, unsigned _vpe);
|
||||
ThreadID _tid, DynInstPtr inst, unsigned event_pri_offset);
|
||||
|
||||
/** Set Type of Event To Be Scheduled */
|
||||
void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid,
|
||||
unsigned _vpe)
|
||||
DynInstPtr _inst)
|
||||
{
|
||||
fault = _fault;
|
||||
cpuEventType = e_type;
|
||||
tid = _tid;
|
||||
vpe = _vpe;
|
||||
inst = _inst;
|
||||
vpe = 0;
|
||||
}
|
||||
|
||||
/** Processes a resource event. */
|
||||
|
@ -222,17 +237,21 @@ class InOrderCPU : public BaseCPU
|
|||
|
||||
/** Schedule a CPU Event */
|
||||
void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid,
|
||||
unsigned vpe, unsigned delay = 0);
|
||||
DynInstPtr inst, unsigned delay = 0,
|
||||
unsigned event_pri_offset = 0);
|
||||
|
||||
public:
|
||||
/** Interface between the CPU and CPU resources. */
|
||||
ResourcePool *resPool;
|
||||
|
||||
/** Instruction used to signify that there is no *real* instruction in buffer slot */
|
||||
/** Instruction used to signify that there is no *real* instruction in
|
||||
buffer slot */
|
||||
DynInstPtr dummyInst[ThePipeline::MaxThreads];
|
||||
DynInstPtr dummyBufferInst;
|
||||
DynInstPtr dummyReqInst;
|
||||
|
||||
/** Used by resources to signify a denied access to a resource. */
|
||||
ResourceRequest *dummyReq;
|
||||
ResourceRequest *dummyReq[ThePipeline::MaxThreads];
|
||||
|
||||
/** Identifies the resource id that identifies a fetch
|
||||
* access unit.
|
||||
|
@ -331,26 +350,39 @@ class InOrderCPU : public BaseCPU
|
|||
void trap(Fault fault, ThreadID tid, int delay = 0);
|
||||
void trapCPU(Fault fault, ThreadID tid);
|
||||
|
||||
/** Setup CPU to insert a thread's context */
|
||||
void insertThread(ThreadID tid);
|
||||
|
||||
/** Remove all of a thread's context from CPU */
|
||||
void removeThread(ThreadID tid);
|
||||
|
||||
/** Add Thread to Active Threads List. */
|
||||
void activateContext(ThreadID tid, int delay = 0);
|
||||
void activateThread(ThreadID tid);
|
||||
void activateThreadInPipeline(ThreadID tid);
|
||||
|
||||
/** Remove Thread from Active Threads List */
|
||||
/** Add Thread to Active Threads List. */
|
||||
void activateNextReadyContext(int delay = 0);
|
||||
void activateNextReadyThread();
|
||||
|
||||
/** Remove from Active Thread List */
|
||||
void deactivateContext(ThreadID tid, int delay = 0);
|
||||
void deactivateThread(ThreadID tid);
|
||||
|
||||
/** Suspend Thread, Remove from Active Threads List, Add to Suspend List */
|
||||
void suspendContext(ThreadID tid, int delay = 0);
|
||||
void suspendThread(ThreadID tid);
|
||||
|
||||
/** Remove Thread from Active Threads List &&
|
||||
* Remove Thread Context from CPU.
|
||||
/** Halt Thread, Remove from Active Thread List, Place Thread on Halted
|
||||
* Threads List
|
||||
*/
|
||||
void deallocateContext(ThreadID tid, int delay = 0);
|
||||
void deallocateThread(ThreadID tid);
|
||||
void deactivateThread(ThreadID tid);
|
||||
void haltContext(ThreadID tid, int delay = 0);
|
||||
void haltThread(ThreadID tid);
|
||||
|
||||
/** squashFromMemStall() - sets up a squash event
|
||||
* squashDueToMemStall() - squashes pipeline
|
||||
* @note: maybe squashContext/squashThread would be better?
|
||||
*/
|
||||
void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0);
|
||||
void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
void removePipelineStalls(ThreadID tid);
|
||||
void squashThreadInPipeline(ThreadID tid);
|
||||
void squashBehindMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
PipelineStage* getPipeStage(int stage_num);
|
||||
|
||||
|
@ -361,37 +393,6 @@ class InOrderCPU : public BaseCPU
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** Remove Thread from Active Threads List &&
|
||||
* Remove Thread Context from CPU.
|
||||
*/
|
||||
void haltContext(ThreadID tid, int delay = 0);
|
||||
|
||||
void removePipelineStalls(ThreadID tid);
|
||||
|
||||
void squashThreadInPipeline(ThreadID tid);
|
||||
|
||||
/// Notify the CPU to enable a virtual processor element.
|
||||
virtual void enableVirtProcElement(unsigned vpe);
|
||||
void enableVPEs(unsigned vpe);
|
||||
|
||||
/// Notify the CPU to disable a virtual processor element.
|
||||
virtual void disableVirtProcElement(ThreadID tid, unsigned vpe);
|
||||
void disableVPEs(ThreadID tid, unsigned vpe);
|
||||
|
||||
/// Notify the CPU that multithreading is enabled.
|
||||
virtual void enableMultiThreading(unsigned vpe);
|
||||
void enableThreads(unsigned vpe);
|
||||
|
||||
/// Notify the CPU that multithreading is disabled.
|
||||
virtual void disableMultiThreading(ThreadID tid, unsigned vpe);
|
||||
void disableThreads(ThreadID tid, unsigned vpe);
|
||||
|
||||
/** Activate a Thread When CPU Resources are Available. */
|
||||
void activateWhenReady(ThreadID tid);
|
||||
|
||||
/** Add or Remove a Thread Context in the CPU. */
|
||||
void doContextSwitch();
|
||||
|
||||
/** Update The Order In Which We Process Threads. */
|
||||
void updateThreadPriority();
|
||||
|
||||
|
@ -420,7 +421,11 @@ class InOrderCPU : public BaseCPU
|
|||
/** Get & Update Next Event Number */
|
||||
InstSeqNum getNextEventNum()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return cpuEventNum++;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Register file accessors */
|
||||
|
@ -550,8 +555,8 @@ class InOrderCPU : public BaseCPU
|
|||
*/
|
||||
std::queue<ListIt> removeList;
|
||||
|
||||
/** List of all the resource requests that will be removed at the end of this
|
||||
* cycle.
|
||||
/** List of all the resource requests that will be removed at the end
|
||||
* of this cycle.
|
||||
*/
|
||||
std::queue<ResourceRequest*> reqRemoveList;
|
||||
|
||||
|
@ -585,18 +590,19 @@ class InOrderCPU : public BaseCPU
|
|||
/** Active Threads List */
|
||||
std::list<ThreadID> activeThreads;
|
||||
|
||||
/** Current Threads List */
|
||||
std::list<ThreadID> currentThreads;
|
||||
/** Ready Threads List */
|
||||
std::list<ThreadID> readyThreads;
|
||||
|
||||
/** Suspended Threads List */
|
||||
std::list<ThreadID> suspendedThreads;
|
||||
|
||||
/** Thread Status Functions (Unused Currently) */
|
||||
bool isThreadInCPU(ThreadID tid);
|
||||
/** Halted Threads List */
|
||||
std::list<ThreadID> haltedThreads;
|
||||
|
||||
/** Thread Status Functions */
|
||||
bool isThreadActive(ThreadID tid);
|
||||
bool isThreadReady(ThreadID tid);
|
||||
bool isThreadSuspended(ThreadID tid);
|
||||
void addToCurrentThreads(ThreadID tid);
|
||||
void removeFromCurrentThreads(ThreadID tid);
|
||||
|
||||
private:
|
||||
/** The activity recorder; used to tell if the CPU has any
|
||||
|
@ -609,6 +615,19 @@ class InOrderCPU : public BaseCPU
|
|||
/** Number of Active Threads in the CPU */
|
||||
ThreadID numActiveThreads() { return activeThreads.size(); }
|
||||
|
||||
/** Thread id of active thread
|
||||
* Only used for SwitchOnCacheMiss model.
|
||||
* Assumes only 1 thread active
|
||||
*/
|
||||
ThreadID activeThreadId()
|
||||
{
|
||||
if (numActiveThreads() > 0)
|
||||
return activeThreads.front();
|
||||
else
|
||||
return InvalidThreadID;
|
||||
}
|
||||
|
||||
|
||||
/** Records that there was time buffer activity this cycle. */
|
||||
void activityThisCycle() { activityRec.activity(); }
|
||||
|
||||
|
@ -627,13 +646,14 @@ class InOrderCPU : public BaseCPU
|
|||
virtual void wakeup();
|
||||
#endif
|
||||
|
||||
/** Gets a free thread id. Use if thread ids change across system. */
|
||||
ThreadID getFreeTid();
|
||||
|
||||
// LL/SC debug functionality
|
||||
unsigned stCondFails;
|
||||
unsigned readStCondFailures() { return stCondFails; }
|
||||
unsigned setStCondFailures(unsigned st_fails) { return stCondFails = st_fails; }
|
||||
|
||||
unsigned readStCondFailures()
|
||||
{ return stCondFails; }
|
||||
|
||||
unsigned setStCondFailures(unsigned st_fails)
|
||||
{ return stCondFails = st_fails; }
|
||||
|
||||
/** Returns a pointer to a thread context. */
|
||||
ThreadContext *tcBase(ThreadID tid = 0)
|
||||
|
@ -663,9 +683,16 @@ class InOrderCPU : public BaseCPU
|
|||
/** The global sequence number counter. */
|
||||
InstSeqNum globalSeqNum[ThePipeline::MaxThreads];
|
||||
|
||||
#ifdef DEBUG
|
||||
/** The global event number counter. */
|
||||
InstSeqNum cpuEventNum;
|
||||
|
||||
/** Number of resource requests active in CPU **/
|
||||
unsigned resReqCount;
|
||||
|
||||
Stats::Scalar maxResReqCount;
|
||||
#endif
|
||||
|
||||
/** Counter of how many stages have completed switching out. */
|
||||
int switchCount;
|
||||
|
||||
|
@ -684,17 +711,13 @@ class InOrderCPU : public BaseCPU
|
|||
/** Per-Stage Instruction Tracing */
|
||||
bool stageTracing;
|
||||
|
||||
/** Is there a context switch pending? */
|
||||
bool contextSwitch;
|
||||
|
||||
/** Threads Scheduled to Enter CPU */
|
||||
std::list<int> cpuWaitList;
|
||||
|
||||
/** The cycle that the CPU was last running, used for statistics. */
|
||||
Tick lastRunningCycle;
|
||||
|
||||
/** Number of Virtual Processors the CPU can process */
|
||||
unsigned numVirtProcs;
|
||||
void updateContextSwitchStats();
|
||||
unsigned instsPerSwitch;
|
||||
Stats::Average instsPerCtxtSwitch;
|
||||
Stats::Scalar numCtxtSwitches;
|
||||
|
||||
/** Update Thread , used for statistic purposes*/
|
||||
inline void tickThreadStats();
|
||||
|
@ -708,9 +731,15 @@ class InOrderCPU : public BaseCPU
|
|||
/** Stat for total number of times the CPU is descheduled. */
|
||||
Stats::Scalar timesIdled;
|
||||
|
||||
/** Stat for total number of cycles the CPU spends descheduled. */
|
||||
/** Stat for total number of cycles the CPU spends descheduled or no stages active. */
|
||||
Stats::Scalar idleCycles;
|
||||
|
||||
/** Stat for total number of cycles the CPU is active. */
|
||||
Stats::Scalar runCycles;
|
||||
|
||||
/** Percentage of cycles a stage was active */
|
||||
Stats::Formula activity;
|
||||
|
||||
/** Stat for the number of committed instructions per thread. */
|
||||
Stats::Vector committedInsts;
|
||||
|
||||
|
|
|
@ -67,11 +67,12 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
|
|||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
DPRINTF(InOrderStage, "Removing instructions from stage instruction list.\n");
|
||||
DPRINTF(InOrderStage, "Removing instructions from stage instruction "
|
||||
"list.\n");
|
||||
while (!insts[tid].empty()) {
|
||||
if (insts[tid].front()->seqNum <= squash_seq_num) {
|
||||
DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because it's <= "
|
||||
"squashing seqNum %i.\n",
|
||||
DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because "
|
||||
"it's <= squashing seqNum %i.\n",
|
||||
tid,
|
||||
insts[tid].front()->seqNum,
|
||||
squash_seq_num);
|
||||
|
@ -82,8 +83,9 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
|
|||
insts[tid].size());
|
||||
break;
|
||||
}
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n",
|
||||
tid, insts[tid].front()->seqNum, insts[tid].front()->PC);
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] "
|
||||
"PC %08p.\n", tid, insts[tid].front()->seqNum,
|
||||
insts[tid].front()->PC);
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
|
@ -93,6 +95,18 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
|
|||
cpu->removeInstsUntil(squash_seq_num, tid);
|
||||
}
|
||||
|
||||
void
|
||||
FirstStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid)
|
||||
{
|
||||
// Need to preserve the stalling instruction in first-stage
|
||||
// since the squash() from first stage also removes
|
||||
// the instruction from the CPU (removeInstsUntil). If that
|
||||
// functionality gets changed then you can move this offset.
|
||||
// (stalling instruction = seq_num + 1)
|
||||
squash(seq_num+1, tid);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FirstStage::processStage(bool &status_change)
|
||||
{
|
||||
|
@ -104,8 +118,9 @@ FirstStage::processStage(bool &status_change)
|
|||
status_change = checkSignalsAndUpdate(tid) || status_change;
|
||||
}
|
||||
|
||||
for (int threadFetched = 0; threadFetched < numFetchingThreads;
|
||||
threadFetched++) {
|
||||
for (int insts_fetched = 0;
|
||||
insts_fetched < stageWidth && canSendInstToStage(1);
|
||||
insts_fetched++) {
|
||||
ThreadID tid = getFetchingThread(fetchPolicy);
|
||||
|
||||
if (tid >= 0) {
|
||||
|
@ -115,16 +130,28 @@ FirstStage::processStage(bool &status_change)
|
|||
DPRINTF(InOrderStage, "No more threads to fetch from.\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (instsProcessed > 0) {
|
||||
++runCycles;
|
||||
idle = false;
|
||||
} else {
|
||||
++idleCycles;
|
||||
idle = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//@TODO: Note in documentation, that when you make a pipeline stage change, then
|
||||
//make sure you change the first stage too
|
||||
//@TODO: Note in documentation, that when you make a pipeline stage change,
|
||||
//then make sure you change the first stage too
|
||||
void
|
||||
FirstStage::processInsts(ThreadID tid)
|
||||
{
|
||||
bool all_reqs_completed = true;
|
||||
|
||||
for (int insts_fetched = 0; insts_fetched < stageWidth && canSendInstToStage(1); insts_fetched++) {
|
||||
for (int insts_fetched = 0;
|
||||
insts_fetched < stageWidth && canSendInstToStage(1);
|
||||
insts_fetched++) {
|
||||
|
||||
DynInstPtr inst;
|
||||
bool new_inst = false;
|
||||
|
||||
|
@ -150,26 +177,21 @@ FirstStage::processInsts(ThreadID tid)
|
|||
inst->traceData = NULL;
|
||||
#endif // TRACING_ON
|
||||
|
||||
DPRINTF(RefCount, "creation: [tid:%i]: [sn:%i]: Refcount = %i.\n",
|
||||
inst->readTid(),
|
||||
inst->seqNum,
|
||||
0/*inst->curCount()*/);
|
||||
|
||||
// Add instruction to the CPU's list of instructions.
|
||||
inst->setInstListIt(cpu->addInst(inst));
|
||||
|
||||
DPRINTF(RefCount, "after add to CPU List: [tid:%i]: [sn:%i]: Refcount = %i.\n",
|
||||
inst->readTid(),
|
||||
inst->seqNum,
|
||||
0/*inst->curCount()*/);
|
||||
|
||||
// Create Front-End Resource Schedule For Instruction
|
||||
ThePipeline::createFrontEndSchedule(inst);
|
||||
}
|
||||
|
||||
// Don't let instruction pass to next stage if it hasnt completed
|
||||
// all of it's requests for this stage.
|
||||
all_reqs_completed = processInstSchedule(inst);
|
||||
int reqs_processed = 0;
|
||||
all_reqs_completed = processInstSchedule(inst, reqs_processed);
|
||||
|
||||
// If the instruction isnt squashed & we've completed one request
|
||||
// Then we can officially count this instruction toward the stage's
|
||||
// bandwidth count
|
||||
if (reqs_processed > 0)
|
||||
instsProcessed++;
|
||||
|
||||
if (!all_reqs_completed) {
|
||||
if (new_inst) {
|
||||
|
@ -184,7 +206,6 @@ FirstStage::processInsts(ThreadID tid)
|
|||
}
|
||||
|
||||
sendInstToNextStage(inst);
|
||||
//++stageProcessedInsts;
|
||||
}
|
||||
|
||||
// Record that stage has written to the time buffer for activity
|
||||
|
@ -197,11 +218,12 @@ FirstStage::processInsts(ThreadID tid)
|
|||
ThreadID
|
||||
FirstStage::getFetchingThread(FetchPriority &fetch_priority)
|
||||
{
|
||||
if (numThreads > 1) {
|
||||
switch (fetch_priority) {
|
||||
ThreadID num_active_threads = cpu->numActiveThreads();
|
||||
|
||||
if (num_active_threads > 1) {
|
||||
switch (fetch_priority) {
|
||||
case SingleThread:
|
||||
return 0;
|
||||
return cpu->activeThreadId();
|
||||
|
||||
case RoundRobin:
|
||||
return roundRobin();
|
||||
|
@ -209,7 +231,7 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority)
|
|||
default:
|
||||
return InvalidThreadID;
|
||||
}
|
||||
} else {
|
||||
} else if (num_active_threads == 1) {
|
||||
ThreadID tid = *activeThreads->begin();
|
||||
|
||||
if (stageStatus[tid] == Running ||
|
||||
|
@ -218,8 +240,9 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority)
|
|||
} else {
|
||||
return InvalidThreadID;
|
||||
}
|
||||
} else {
|
||||
return InvalidThreadID;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ThreadID
|
||||
|
|
|
@ -61,6 +61,8 @@ class FirstStage : public PipelineStage {
|
|||
/** Squash Instructions Above a Seq. Num */
|
||||
void squash(InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
/** There are no insts. coming from previous stages, so there is
|
||||
* no need to sort insts here
|
||||
*/
|
||||
|
|
|
@ -111,6 +111,12 @@ InOrderDynInst::initVars()
|
|||
{
|
||||
fetchMemReq = NULL;
|
||||
dataMemReq = NULL;
|
||||
splitMemData = NULL;
|
||||
split2ndAddr = 0;
|
||||
split2ndAccess = false;
|
||||
splitInst = false;
|
||||
splitInstSked = false;
|
||||
splitFinishCnt = 0;
|
||||
|
||||
effAddr = 0;
|
||||
physEffAddr = 0;
|
||||
|
@ -159,7 +165,7 @@ InOrderDynInst::initVars()
|
|||
|
||||
// Update Instruction Count for this instruction
|
||||
++instcount;
|
||||
if (instcount > 500) {
|
||||
if (instcount > 100) {
|
||||
fatal("Number of Active Instructions in CPU is too high. "
|
||||
"(Not Dereferencing Ptrs. Correctly?)\n");
|
||||
}
|
||||
|
@ -170,6 +176,12 @@ InOrderDynInst::initVars()
|
|||
threadNumber, seqNum, instcount);
|
||||
}
|
||||
|
||||
void
|
||||
InOrderDynInst::resetInstCount()
|
||||
{
|
||||
instcount = 0;
|
||||
}
|
||||
|
||||
|
||||
InOrderDynInst::~InOrderDynInst()
|
||||
{
|
||||
|
@ -187,6 +199,10 @@ InOrderDynInst::~InOrderDynInst()
|
|||
delete traceData;
|
||||
}
|
||||
|
||||
if (splitMemData) {
|
||||
delete [] splitMemData;
|
||||
}
|
||||
|
||||
fault = NoFault;
|
||||
|
||||
--instcount;
|
||||
|
@ -583,30 +599,6 @@ InOrderDynInst::deallocateContext(int thread_num)
|
|||
this->cpu->deallocateContext(thread_num);
|
||||
}
|
||||
|
||||
void
|
||||
InOrderDynInst::enableVirtProcElement(unsigned vpe)
|
||||
{
|
||||
this->cpu->enableVirtProcElement(vpe);
|
||||
}
|
||||
|
||||
void
|
||||
InOrderDynInst::disableVirtProcElement(unsigned vpe)
|
||||
{
|
||||
this->cpu->disableVirtProcElement(threadNumber, vpe);
|
||||
}
|
||||
|
||||
void
|
||||
InOrderDynInst::enableMultiThreading(unsigned vpe)
|
||||
{
|
||||
this->cpu->enableMultiThreading(vpe);
|
||||
}
|
||||
|
||||
void
|
||||
InOrderDynInst::disableMultiThreading(unsigned vpe)
|
||||
{
|
||||
this->cpu->disableMultiThreading(threadNumber, vpe);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline Fault
|
||||
InOrderDynInst::read(Addr addr, T &data, unsigned flags)
|
||||
|
|
|
@ -164,6 +164,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted
|
|||
/// instructions ahead of it
|
||||
SerializeAfter, /// Needs to serialize instructions behind it
|
||||
SerializeHandled, /// Serialization has been handled
|
||||
RemoveList, /// Is Instruction on Remove List?
|
||||
NumStatus
|
||||
};
|
||||
|
||||
|
@ -330,6 +331,20 @@ class InOrderDynInst : public FastAlloc, public RefCounted
|
|||
public:
|
||||
Tick memTime;
|
||||
|
||||
PacketDataPtr splitMemData;
|
||||
RequestPtr splitMemReq;
|
||||
int splitTotalSize;
|
||||
int split2ndSize;
|
||||
Addr split2ndAddr;
|
||||
bool split2ndAccess;
|
||||
uint8_t split2ndData;
|
||||
PacketDataPtr split2ndDataPtr;
|
||||
unsigned split2ndFlags;
|
||||
bool splitInst;
|
||||
int splitFinishCnt;
|
||||
uint64_t *split2ndStoreDataPtr;
|
||||
bool splitInstSked;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// BASE INSTRUCTION INFORMATION.
|
||||
|
@ -468,7 +483,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted
|
|||
if (!resSched.empty()) {
|
||||
ThePipeline::ScheduleEntry* sked = resSched.top();
|
||||
resSched.pop();
|
||||
if (sked != 0) {
|
||||
delete sked;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -515,12 +533,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
|
|||
////////////////////////////////////////////////////////////
|
||||
virtual void deallocateContext(int thread_num);
|
||||
|
||||
virtual void enableVirtProcElement(unsigned vpe);
|
||||
virtual void disableVirtProcElement(unsigned vpe);
|
||||
|
||||
virtual void enableMultiThreading(unsigned vpe);
|
||||
virtual void disableMultiThreading(unsigned vpe);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// PROGRAM COUNTERS - PC/NPC/NPC
|
||||
|
@ -905,6 +917,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted
|
|||
/** Returns whether or not the entry is on the CPU Reg Dep Map */
|
||||
bool isRegDepEntry() const { return status[RegDepMapEntry]; }
|
||||
|
||||
/** Sets this instruction as entered on the CPU Reg Dep Map */
|
||||
void setRemoveList() { status.set(RemoveList); }
|
||||
|
||||
/** Returns whether or not the entry is on the CPU Reg Dep Map */
|
||||
bool isRemoveList() const { return status[RemoveList]; }
|
||||
|
||||
/** Sets this instruction as completed. */
|
||||
void setCompleted() { status.set(Completed); }
|
||||
|
||||
|
@ -1022,13 +1040,14 @@ class InOrderDynInst : public FastAlloc, public RefCounted
|
|||
/** Count of total number of dynamic instructions. */
|
||||
static int instcount;
|
||||
|
||||
void resetInstCount();
|
||||
|
||||
/** Dumps out contents of this BaseDynInst. */
|
||||
void dump();
|
||||
|
||||
/** Dumps out contents of this BaseDynInst into given string. */
|
||||
void dump(std::string &outstring);
|
||||
|
||||
|
||||
//inline int curCount() { return curCount(); }
|
||||
};
|
||||
|
||||
|
|
|
@ -42,8 +42,11 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num)
|
|||
: stageNum(stage_num), stageWidth(ThePipeline::StageWidth),
|
||||
numThreads(ThePipeline::MaxThreads), _status(Inactive),
|
||||
stageBufferMax(ThePipeline::interStageBuffSize[stage_num]),
|
||||
prevStageValid(false), nextStageValid(false)
|
||||
prevStageValid(false), nextStageValid(false), idle(false)
|
||||
{
|
||||
switchedOutBuffer.resize(ThePipeline::MaxThreads);
|
||||
switchedOutValid.resize(ThePipeline::MaxThreads);
|
||||
|
||||
init(params);
|
||||
}
|
||||
|
||||
|
@ -76,34 +79,20 @@ PipelineStage::name() const
|
|||
void
|
||||
PipelineStage::regStats()
|
||||
{
|
||||
/* stageIdleCycles
|
||||
.name(name() + ".IdleCycles")
|
||||
.desc("Number of cycles stage is idle")
|
||||
.prereq(stageIdleCycles);
|
||||
stageBlockedCycles
|
||||
.name(name() + ".BlockedCycles")
|
||||
.desc("Number of cycles stage is blocked")
|
||||
.prereq(stageBlockedCycles);
|
||||
stageRunCycles
|
||||
.name(name() + ".RunCycles")
|
||||
.desc("Number of cycles stage is running")
|
||||
.prereq(stageRunCycles);
|
||||
stageUnblockCycles
|
||||
.name(name() + ".UnblockCycles")
|
||||
.desc("Number of cycles stage is unblocking")
|
||||
.prereq(stageUnblockCycles);
|
||||
stageSquashCycles
|
||||
.name(name() + ".SquashCycles")
|
||||
.desc("Number of cycles stage is squashing")
|
||||
.prereq(stageSquashCycles);
|
||||
stageProcessedInsts
|
||||
.name(name() + ".ProcessedInsts")
|
||||
.desc("Number of instructions handled by stage")
|
||||
.prereq(stageProcessedInsts);
|
||||
stageSquashedInsts
|
||||
.name(name() + ".SquashedInsts")
|
||||
.desc("Number of squashed instructions handled by stage")
|
||||
.prereq(stageSquashedInsts);*/
|
||||
idleCycles
|
||||
.name(name() + ".idleCycles")
|
||||
.desc("Number of cycles 0 instructions are processed.");
|
||||
|
||||
runCycles
|
||||
.name(name() + ".runCycles")
|
||||
.desc("Number of cycles 1+ instructions are processed.");
|
||||
|
||||
utilization
|
||||
.name(name() + ".utilization")
|
||||
.desc("Percentage of cycles stage was utilized (processing insts).")
|
||||
.precision(6);
|
||||
utilization = (runCycles / cpu->numCycles) * 100;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -112,8 +101,6 @@ PipelineStage::setCPU(InOrderCPU *cpu_ptr)
|
|||
{
|
||||
cpu = cpu_ptr;
|
||||
|
||||
dummyBufferInst = new InOrderDynInst(cpu_ptr, NULL, 0, 0, 0);
|
||||
|
||||
DPRINTF(InOrderStage, "Set CPU pointer.\n");
|
||||
|
||||
tracer = dynamic_cast<Trace::InOrderTrace *>(cpu->getTracer());
|
||||
|
@ -267,7 +254,8 @@ PipelineStage::isBlocked(ThreadID tid)
|
|||
bool
|
||||
PipelineStage::block(ThreadID tid)
|
||||
{
|
||||
DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to previous stages.\n", tid);
|
||||
DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to "
|
||||
"previous stages.\n", tid);
|
||||
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
|
@ -296,7 +284,8 @@ PipelineStage::block(ThreadID tid)
|
|||
void
|
||||
PipelineStage::blockDueToBuffer(ThreadID tid)
|
||||
{
|
||||
DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to next stage.\n", tid);
|
||||
DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to "
|
||||
"next stage.\n", tid);
|
||||
|
||||
if (stageStatus[tid] != Blocked) {
|
||||
// Set the status to Blocked.
|
||||
|
@ -334,8 +323,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
|
|||
{
|
||||
if (cpu->squashSeqNum[tid] < inst->seqNum &&
|
||||
cpu->lastSquashCycle[tid] == curTick){
|
||||
DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another stage's squash "
|
||||
"signal for after [sn:%i].\n", inst->seqNum, cpu->squashSeqNum[tid]);
|
||||
DPRINTF(Resource, "Ignoring [sn:%i] branch squash signal due to "
|
||||
"another stage's squash signal for after [sn:%i].\n",
|
||||
inst->seqNum, cpu->squashSeqNum[tid]);
|
||||
} else {
|
||||
// Send back mispredict information.
|
||||
toPrevStages->stageInfo[stageNum][tid].branchMispredict = true;
|
||||
|
@ -346,20 +336,28 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
|
|||
|
||||
|
||||
#if ISA_HAS_DELAY_SLOT
|
||||
toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() !=
|
||||
toPrevStages->stageInfo[stageNum][tid].branchTaken =
|
||||
inst->readNextNPC() !=
|
||||
(inst->readNextPC() + sizeof(TheISA::MachInst));
|
||||
toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum;
|
||||
|
||||
toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum =
|
||||
inst->bdelaySeqNum;
|
||||
|
||||
InstSeqNum squash_seq_num = inst->bdelaySeqNum;
|
||||
#else
|
||||
toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() !=
|
||||
toPrevStages->stageInfo[stageNum][tid].branchTaken =
|
||||
inst->readNextPC() !=
|
||||
(inst->readPC() + sizeof(TheISA::MachInst));
|
||||
|
||||
toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum;
|
||||
InstSeqNum squash_seq_num = inst->seqNum;
|
||||
#endif
|
||||
|
||||
DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] "
|
||||
"branch.\n", tid, squash_seq_num, inst->seqNum);
|
||||
DPRINTF(InOrderStage, "Target being re-set to %08p\n",
|
||||
inst->readPredTarg());
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], "
|
||||
"due to [sn:%i] branch.\n", tid, squash_seq_num,
|
||||
inst->seqNum);
|
||||
|
||||
// Save squash num for later stage use
|
||||
cpu->squashSeqNum[tid] = squash_seq_num;
|
||||
|
@ -367,6 +365,12 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
PipelineStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid)
|
||||
{
|
||||
squash(seq_num, tid);
|
||||
}
|
||||
|
||||
void
|
||||
PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
|
||||
{
|
||||
|
@ -376,12 +380,15 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
|
|||
for (int i=0; i < prevStage->size; i++) {
|
||||
if (prevStage->insts[i]->threadNumber == tid &&
|
||||
prevStage->insts[i]->seqNum > squash_seq_num) {
|
||||
// Change Comment to Annulling previous instruction
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Squashing instruction, "
|
||||
"[sn:%i] PC %08p.\n",
|
||||
tid,
|
||||
prevStage->insts[i]->seqNum,
|
||||
prevStage->insts[i]->readPC());
|
||||
prevStage->insts[i]->setSquashed();
|
||||
|
||||
prevStage->insts[i] = cpu->dummyBufferInst;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -394,18 +401,20 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
|
|||
|
||||
squashPrevStageInsts(squash_seq_num, tid);
|
||||
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage skidbuffer.\n",
|
||||
tid);
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage"
|
||||
" skidbuffer.\n", tid);
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer "
|
||||
"instructions before delay slot [sn:%i]. %i insts"
|
||||
"left.\n", tid, squash_seq_num,
|
||||
"instructions (starting w/[sn:%i]) before delay slot "
|
||||
"[sn:%i]. %i insts left.\n", tid,
|
||||
skidBuffer[tid].front()->seqNum, squash_seq_num,
|
||||
skidBuffer[tid].size());
|
||||
break;
|
||||
}
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n",
|
||||
tid, skidBuffer[tid].front()->seqNum, skidBuffer[tid].front()->PC);
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] "
|
||||
" PC %08p.\n", tid, skidBuffer[tid].front()->seqNum,
|
||||
skidBuffer[tid].front()->PC);
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
|
@ -427,7 +436,8 @@ PipelineStage::stageBufferAvail()
|
|||
int avail = stageBufferMax - total -0;// incoming_insts;
|
||||
|
||||
if (avail < 0)
|
||||
fatal("stageNum %i:stageBufferAvail() < 0...stBMax=%i,total=%i,incoming=%i=>%i",
|
||||
fatal("stageNum %i:stageBufferAvail() < 0..."
|
||||
"stBMax=%i,total=%i,incoming=%i=>%i",
|
||||
stageNum, stageBufferMax, total, incoming_insts, avail);
|
||||
|
||||
return avail;
|
||||
|
@ -443,7 +453,8 @@ PipelineStage::canSendInstToStage(unsigned stage_num)
|
|||
}
|
||||
|
||||
if (!buffer_avail && nextStageQueueValid(stage_num)) {
|
||||
DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", stageNum + 1);
|
||||
DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n",
|
||||
stageNum + 1);
|
||||
}
|
||||
|
||||
return buffer_avail;
|
||||
|
@ -461,8 +472,9 @@ PipelineStage::skidInsert(ThreadID tid)
|
|||
|
||||
assert(tid == inst->threadNumber);
|
||||
|
||||
DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n",
|
||||
tid, inst->seqNum, inst->readPC(), inst->threadNumber);
|
||||
DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage "
|
||||
"skidBuffer %i\n", tid, inst->seqNum, inst->readPC(),
|
||||
inst->threadNumber);
|
||||
|
||||
skidBuffer[tid].push(inst);
|
||||
}
|
||||
|
@ -533,6 +545,39 @@ PipelineStage::updateStatus()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
PipelineStage::activateThread(ThreadID tid)
|
||||
{
|
||||
if (cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
|
||||
if (!switchedOutValid[tid]) {
|
||||
DPRINTF(InOrderStage, "[tid:%i] No instruction available in "
|
||||
"switch out buffer.\n", tid);
|
||||
} else {
|
||||
DynInstPtr inst = switchedOutBuffer[tid];
|
||||
|
||||
DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into "
|
||||
"stage skidBuffer %i\n", tid, inst->seqNum,
|
||||
inst->readPC(), inst->threadNumber);
|
||||
|
||||
// Make instruction available for pipeline processing
|
||||
skidBuffer[tid].push(inst);
|
||||
|
||||
// Update PC so that we start fetching after this instruction to prevent
|
||||
// "double"-execution of instructions
|
||||
cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)
|
||||
ResourcePool::UpdateAfterContextSwitch,
|
||||
inst, 0, 0, tid);
|
||||
|
||||
// Clear switchout buffer
|
||||
switchedOutBuffer[tid] = NULL;
|
||||
switchedOutValid[tid] = false;
|
||||
|
||||
// Update any CPU stats based off context switches
|
||||
cpu->updateContextSwitchStats();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
|
@ -547,16 +592,16 @@ PipelineStage::sortInsts()
|
|||
for (int i = 0; i < insts_from_prev_stage; ++i) {
|
||||
|
||||
if (prevStage->insts[i]->isSquashed()) {
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], not inserting "
|
||||
"into stage buffer.\n",
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], "
|
||||
"not inserting into stage buffer.\n",
|
||||
prevStage->insts[i]->readTid(),
|
||||
prevStage->insts[i]->seqNum);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage buffer.\n",
|
||||
prevStage->insts[i]->readTid(),
|
||||
DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage "
|
||||
"buffer.\n", prevStage->insts[i]->readTid(),
|
||||
prevStage->insts[i]->seqNum);
|
||||
|
||||
ThreadID tid = prevStage->insts[i]->threadNumber;
|
||||
|
@ -565,7 +610,7 @@ PipelineStage::sortInsts()
|
|||
|
||||
skidBuffer[tid].push(prevStage->insts[i]);
|
||||
|
||||
prevStage->insts[i] = dummyBufferInst;
|
||||
prevStage->insts[i] = cpu->dummyBufferInst;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -611,8 +656,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
|
|||
// Check for squash from later pipeline stages
|
||||
for (int stage_idx=stageNum; stage_idx < NumStages; stage_idx++) {
|
||||
if (fromNextStages->stageInfo[stage_idx][tid].squash) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to squash "
|
||||
"from stage %u.\n", tid, stage_idx);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to "
|
||||
"squash from stage %u.\n", tid, stage_idx);
|
||||
InstSeqNum squash_seq_num = fromNextStages->
|
||||
stageInfo[stage_idx][tid].bdelayDoneSeqNum;
|
||||
squash(squash_seq_num, tid);
|
||||
|
@ -625,8 +670,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
|
|||
}
|
||||
|
||||
if (stageStatus[tid] == Blocked) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to unblocking.\n",
|
||||
tid);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to "
|
||||
"unblocking.\n", tid);
|
||||
|
||||
stageStatus[tid] = Unblocking;
|
||||
|
||||
|
@ -637,15 +682,15 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
|
|||
|
||||
if (stageStatus[tid] == Squashing) {
|
||||
if (!skidBuffer[tid].empty()) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to unblocking.\n",
|
||||
tid);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to "
|
||||
"unblocking.\n", tid);
|
||||
|
||||
stageStatus[tid] = Unblocking;
|
||||
} else {
|
||||
// Switch status to running if stage isn't being told to block or
|
||||
// squash this cycle.
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to running.\n",
|
||||
tid);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to "
|
||||
"running.\n", tid);
|
||||
|
||||
stageStatus[tid] = Running;
|
||||
}
|
||||
|
@ -663,6 +708,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
|
|||
void
|
||||
PipelineStage::tick()
|
||||
{
|
||||
idle = false;
|
||||
|
||||
wroteToTimeBuffer = false;
|
||||
|
||||
bool status_change = false;
|
||||
|
@ -674,6 +721,8 @@ PipelineStage::tick()
|
|||
|
||||
sortInsts();
|
||||
|
||||
instsProcessed = 0;
|
||||
|
||||
processStage(status_change);
|
||||
|
||||
if (status_change) {
|
||||
|
@ -717,13 +766,13 @@ PipelineStage::unsetResStall(ResReqPtr res_req, ThreadID tid)
|
|||
}
|
||||
|
||||
if (stalls[tid].resources.size() == 0) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource stalls.\n",
|
||||
tid);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource"
|
||||
"stalls.\n", tid);
|
||||
}
|
||||
}
|
||||
|
||||
// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be handled
|
||||
// one at a time, but instead first come first serve by instruction?
|
||||
// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be
|
||||
// handled one at a time, but instead first come first serve by instruction?
|
||||
// Questions are how should a pipeline stage handle thread-specific stalls &
|
||||
// pipeline squashes
|
||||
void
|
||||
|
@ -746,30 +795,32 @@ PipelineStage::processStage(bool &status_change)
|
|||
nextStage->size, stageNum + 1);
|
||||
}
|
||||
|
||||
if (instsProcessed > 0) {
|
||||
++runCycles;
|
||||
idle = false;
|
||||
} else {
|
||||
++idleCycles;
|
||||
idle = true;
|
||||
}
|
||||
|
||||
DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(),
|
||||
stageNum);
|
||||
|
||||
DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", stageBufferAvail(),
|
||||
stageNum);
|
||||
DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n",
|
||||
stageBufferAvail(), stageNum);
|
||||
}
|
||||
|
||||
void
|
||||
PipelineStage::processThread(bool &status_change, ThreadID tid)
|
||||
{
|
||||
// If status is Running or idle,
|
||||
// call stageInsts()
|
||||
// call processInsts()
|
||||
// If status is Unblocking,
|
||||
// buffer any instructions coming from fetch
|
||||
// continue trying to empty skid buffer
|
||||
// check if stall conditions have passed
|
||||
|
||||
if (stageStatus[tid] == Blocked) {
|
||||
;//++stageBlockedCycles;
|
||||
} else if (stageStatus[tid] == Squashing) {
|
||||
;//++stageSquashCycles;
|
||||
}
|
||||
|
||||
// Stage should try to stage as many instructions as its bandwidth
|
||||
// Stage should try to process as many instructions as its bandwidth
|
||||
// will allow, as long as it is not currently blocked.
|
||||
if (stageStatus[tid] == Running ||
|
||||
stageStatus[tid] == Idle) {
|
||||
|
@ -810,26 +861,22 @@ PipelineStage::processInsts(ThreadID tid)
|
|||
if (insts_available == 0) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Nothing to do, breaking out"
|
||||
" early.\n",tid);
|
||||
// Should I change the status to idle?
|
||||
//++stageIdleCycles;
|
||||
return;
|
||||
}
|
||||
|
||||
DynInstPtr inst;
|
||||
bool last_req_completed = true;
|
||||
|
||||
int insts_processed = 0;
|
||||
|
||||
while (insts_available > 0 &&
|
||||
insts_processed < stageWidth &&
|
||||
instsProcessed < stageWidth &&
|
||||
(!nextStageValid || canSendInstToStage(stageNum+1)) &&
|
||||
last_req_completed) {
|
||||
assert(!insts_to_stage.empty());
|
||||
|
||||
inst = insts_to_stage.front();
|
||||
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] with "
|
||||
"PC %#x\n",
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] "
|
||||
"with PC %#x\n",
|
||||
tid, inst->seqNum, inst->readPC());
|
||||
|
||||
if (inst->isSquashed()) {
|
||||
|
@ -837,8 +884,6 @@ PipelineStage::processInsts(ThreadID tid)
|
|||
"squashed, skipping.\n",
|
||||
tid, inst->seqNum, inst->readPC());
|
||||
|
||||
//++stageSquashedInsts;
|
||||
|
||||
insts_to_stage.pop();
|
||||
|
||||
--insts_available;
|
||||
|
@ -846,8 +891,14 @@ PipelineStage::processInsts(ThreadID tid)
|
|||
continue;
|
||||
}
|
||||
|
||||
int reqs_processed = 0;
|
||||
last_req_completed = processInstSchedule(inst, reqs_processed);
|
||||
|
||||
last_req_completed = processInstSchedule(inst);
|
||||
// If the instruction isnt squashed & we've completed one request
|
||||
// Then we can officially count this instruction toward the stage's
|
||||
// bandwidth count
|
||||
if (reqs_processed > 0)
|
||||
instsProcessed++;
|
||||
|
||||
// Don't let instruction pass to next stage if it hasnt completed
|
||||
// all of it's requests for this stage.
|
||||
|
@ -856,16 +907,13 @@ PipelineStage::processInsts(ThreadID tid)
|
|||
|
||||
// Send to Next Stage or Break Loop
|
||||
if (nextStageValid && !sendInstToNextStage(inst)) {
|
||||
DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage %i.\n",
|
||||
tid, inst->seqNum,inst->nextStage);
|
||||
DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage"
|
||||
" %i.\n", tid, inst->seqNum,inst->nextStage);
|
||||
break;
|
||||
}
|
||||
|
||||
insts_processed++;
|
||||
|
||||
insts_to_stage.pop();
|
||||
|
||||
//++stageProcessedInsts;
|
||||
--insts_available;
|
||||
}
|
||||
|
||||
|
@ -883,12 +931,10 @@ PipelineStage::processInsts(ThreadID tid)
|
|||
}
|
||||
|
||||
bool
|
||||
PipelineStage::processInstSchedule(DynInstPtr inst)
|
||||
PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
|
||||
{
|
||||
bool last_req_completed = true;
|
||||
#if TRACING_ON
|
||||
ThreadID tid = inst->readTid();
|
||||
#endif
|
||||
|
||||
if (inst->nextResStage() == stageNum) {
|
||||
int res_stage_num = inst->nextResStage();
|
||||
|
@ -897,14 +943,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
|
|||
int res_num = inst->nextResource();
|
||||
|
||||
|
||||
DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s.\n",
|
||||
tid, inst->seqNum, cpu->resPool->name(res_num));
|
||||
DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s."
|
||||
"\n", tid, inst->seqNum, cpu->resPool->name(res_num));
|
||||
|
||||
ResReqPtr req = cpu->resPool->request(res_num, inst);
|
||||
|
||||
if (req->isCompleted()) {
|
||||
DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s completed.\n",
|
||||
tid, inst->seqNum, cpu->resPool->name(res_num));
|
||||
DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s "
|
||||
"completed.\n", tid, inst->seqNum,
|
||||
cpu->resPool->name(res_num));
|
||||
|
||||
if (req->fault == NoFault) {
|
||||
inst->popSchedEntry();
|
||||
|
@ -912,12 +959,58 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
|
|||
panic("%i: encountered %s fault!\n",
|
||||
curTick, req->fault->name());
|
||||
}
|
||||
|
||||
reqs_processed++;
|
||||
|
||||
req->stagePasses++;
|
||||
} else {
|
||||
DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed.\n",
|
||||
tid, inst->seqNum, cpu->resPool->name(res_num));
|
||||
DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed."
|
||||
"\n", tid, inst->seqNum, cpu->resPool->name(res_num));
|
||||
|
||||
last_req_completed = false;
|
||||
|
||||
if (req->isMemStall() &&
|
||||
cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
|
||||
// Save Stalling Instruction
|
||||
DPRINTF(ThreadModel, "[tid:%i] [sn:%i] Detected cache miss.\n", tid, inst->seqNum);
|
||||
|
||||
DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n",
|
||||
tid, inst->seqNum);
|
||||
|
||||
switchedOutBuffer[tid] = inst;
|
||||
switchedOutValid[tid] = true;
|
||||
|
||||
// Remove Thread From Pipeline & Resource Pool
|
||||
inst->squashingStage = stageNum;
|
||||
inst->bdelaySeqNum = inst->seqNum;
|
||||
cpu->squashFromMemStall(inst, tid);
|
||||
|
||||
// Switch On Cache Miss
|
||||
//=====================
|
||||
// Suspend Thread at end of cycle
|
||||
DPRINTF(ThreadModel, "Suspending [tid:%i] due to cache miss.\n", tid);
|
||||
cpu->suspendContext(tid);
|
||||
|
||||
// Activate Next Ready Thread at end of cycle
|
||||
DPRINTF(ThreadModel, "Attempting to activate next ready thread due to"
|
||||
" cache miss.\n");
|
||||
cpu->activateNextReadyContext();
|
||||
}
|
||||
|
||||
// Mark request for deletion
|
||||
// if it isnt currently being used by a resource
|
||||
if (!req->hasSlot()) {
|
||||
DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no slot in resource.\n",
|
||||
inst->seqNum);
|
||||
|
||||
cpu->reqRemoveList.push(req);
|
||||
} else {
|
||||
DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, in resource [slot:%i].\n",
|
||||
inst->seqNum, req->getSlot());
|
||||
//req = cpu->dummyReq[tid];
|
||||
}
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -956,12 +1049,12 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
|
|||
assert(next_stage >= 1);
|
||||
assert(prev_stage >= 0);
|
||||
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to stage %u.\n", tid,
|
||||
stageNum+1);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to "
|
||||
"stage %u.\n", tid, stageNum+1);
|
||||
|
||||
if (!canSendInstToStage(inst->nextStage)) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to stage %u.\n", tid,
|
||||
stageNum+1);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to "
|
||||
"stage %u.\n", tid, stageNum+1);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -969,12 +1062,14 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
|
|||
if (nextStageQueueValid(inst->nextStage - 1)) {
|
||||
if (inst->seqNum > cpu->squashSeqNum[tid] &&
|
||||
curTick == cpu->lastSquashCycle[tid]) {
|
||||
DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping insertion "
|
||||
"into stage %i queue.\n", tid, inst->seqNum, inst->nextStage);
|
||||
DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping "
|
||||
"insertion into stage %i queue.\n", tid, inst->seqNum,
|
||||
inst->nextStage);
|
||||
} else {
|
||||
if (nextStageValid) {
|
||||
DPRINTF(InOrderStage, "[tid:%u] %i slots available in next stage buffer.\n",
|
||||
tid, cpu->pipelineStage[next_stage]->stageBufferAvail());
|
||||
DPRINTF(InOrderStage, "[tid:%u] %i slots available in next "
|
||||
"stage buffer.\n", tid,
|
||||
cpu->pipelineStage[next_stage]->stageBufferAvail());
|
||||
}
|
||||
|
||||
DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: being placed into "
|
||||
|
@ -982,11 +1077,13 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
|
|||
tid, inst->seqNum, toNextStageIndex,
|
||||
cpu->pipelineStage[prev_stage]->nextStageQueue->id());
|
||||
|
||||
int next_stage_idx = cpu->pipelineStage[prev_stage]->nextStage->size;
|
||||
int next_stage_idx =
|
||||
cpu->pipelineStage[prev_stage]->nextStage->size;
|
||||
|
||||
// Place instructions in inter-stage communication struct for the next
|
||||
// Place instructions in inter-stage communication struct for next
|
||||
// pipeline stage to read next cycle
|
||||
cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] = inst;
|
||||
cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx]
|
||||
= inst;
|
||||
|
||||
++(cpu->pipelineStage[prev_stage]->nextStage->size);
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ class PipelineStage
|
|||
virtual void processInsts(ThreadID tid);
|
||||
|
||||
/** Process all resources on an instruction's resource schedule */
|
||||
virtual bool processInstSchedule(DynInstPtr inst);
|
||||
virtual bool processInstSchedule(DynInstPtr inst, int &reqs_processed);
|
||||
|
||||
/** Is there room in the next stage buffer for this instruction? */
|
||||
virtual bool canSendInstToStage(unsigned stage_num);
|
||||
|
@ -235,11 +235,15 @@ class PipelineStage
|
|||
|
||||
|
||||
public:
|
||||
virtual void activateThread(ThreadID tid);
|
||||
|
||||
/** Squashes if there is a PC-relative branch that was predicted
|
||||
* incorrectly. Sends squash information back to fetch.
|
||||
*/
|
||||
virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
virtual void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
/** Squash instructions from stage buffer */
|
||||
virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
|
@ -259,19 +263,33 @@ class PipelineStage
|
|||
/** List of active thread ids */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Buffer of instructions switched out to mem-stall.
|
||||
* Only used when using SwitchOnCacheMiss threading model
|
||||
* Used as 1-to-1 mapping between ThreadID and Entry.
|
||||
*/
|
||||
std::vector<DynInstPtr> switchedOutBuffer;
|
||||
std::vector<bool> switchedOutValid;
|
||||
|
||||
/** Instructions that we've processed this tick
|
||||
* NOTE: "Processed" means completed at least 1 instruction request
|
||||
*/
|
||||
unsigned instsProcessed;
|
||||
|
||||
/** Queue of all instructions coming from previous stage on this cycle. */
|
||||
std::queue<DynInstPtr> insts[ThePipeline::MaxThreads];
|
||||
|
||||
/** Queue of instructions that are finished processing and ready to go next stage.
|
||||
* This is used to prevent from processing an instrution more than once on any
|
||||
* stage. NOTE: It is up to the PROGRAMMER must manage this as a queue
|
||||
/** Queue of instructions that are finished processing and ready to go
|
||||
* next stage. This is used to prevent from processing an instrution more
|
||||
* than once on any stage. NOTE: It is up to the PROGRAMMER must manage
|
||||
* this as a queue
|
||||
*/
|
||||
std::list<DynInstPtr> instsToNextStage;
|
||||
|
||||
/** Skid buffer between previous stage and this one. */
|
||||
std::queue<DynInstPtr> skidBuffer[ThePipeline::MaxThreads];
|
||||
|
||||
/** Instruction used to signify that there is no *real* instruction in buffer slot */
|
||||
/** Instruction used to signify that there is no *real* instruction in
|
||||
* buffer slot */
|
||||
DynInstPtr dummyBufferInst;
|
||||
|
||||
/** SeqNum of Squashing Branch Delay Instruction (used for MIPS) */
|
||||
|
@ -329,30 +347,27 @@ class PipelineStage
|
|||
/** Is Next Stage Valid? */
|
||||
bool nextStageValid;
|
||||
|
||||
bool idle;
|
||||
|
||||
/** Source of possible stalls. */
|
||||
struct Stalls {
|
||||
bool stage[ThePipeline::NumStages];
|
||||
std::vector<ResReqPtr> resources;
|
||||
};
|
||||
|
||||
/** Tracks which stages are telling decode to stall. */
|
||||
/** Tracks stage/resource stalls */
|
||||
Stalls stalls[ThePipeline::MaxThreads];
|
||||
|
||||
//@TODO: Use Stats for the pipeline stages
|
||||
/** Stat for total number of idle cycles. */
|
||||
//Stats::Scalar stageIdleCycles;
|
||||
/** Stat for total number of blocked cycles. */
|
||||
//Stats::Scalar stageBlockedCycles;
|
||||
/** Stat for total number of normal running cycles. */
|
||||
//Stats::Scalar stageRunCycles;
|
||||
/** Stat for total number of unblocking cycles. */
|
||||
//Stats::Scalar stageUnblockCycles;
|
||||
/** Stat for total number of squashing cycles. */
|
||||
//Stats::Scalar stageSquashCycles;
|
||||
/** Stat for total number of staged instructions. */
|
||||
//Stats::Scalar stageProcessedInsts;
|
||||
/** Stat for total number of squashed instructions. */
|
||||
//Stats::Scalar stageSquashedInsts;
|
||||
/** Number of cycles 0 instruction(s) are processed. */
|
||||
Stats::Scalar idleCycles;
|
||||
|
||||
/** Number of cycles 1+ instructions are processed. */
|
||||
Stats::Scalar runCycles;
|
||||
|
||||
/** Percentage of cycles 1+ instructions are processed. */
|
||||
Stats::Formula utilization;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -65,16 +65,18 @@ int getNextPriority(DynInstPtr &inst, int stage_num)
|
|||
|
||||
void createFrontEndSchedule(DynInstPtr &inst)
|
||||
{
|
||||
InstStage *I = inst->addStage();
|
||||
InstStage *E = inst->addStage();
|
||||
InstStage *F = inst->addStage();
|
||||
InstStage *D = inst->addStage();
|
||||
|
||||
I->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
|
||||
I->needs(ICache, CacheUnit::InitiateFetch);
|
||||
// FETCH
|
||||
F->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
|
||||
F->needs(ICache, CacheUnit::InitiateFetch);
|
||||
|
||||
E->needs(ICache, CacheUnit::CompleteFetch);
|
||||
E->needs(Decode, DecodeUnit::DecodeInst);
|
||||
E->needs(BPred, BranchPredictor::PredictBranch);
|
||||
E->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
|
||||
// DECODE
|
||||
D->needs(ICache, CacheUnit::CompleteFetch);
|
||||
D->needs(Decode, DecodeUnit::DecodeInst);
|
||||
D->needs(BPred, BranchPredictor::PredictBranch);
|
||||
D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
|
||||
}
|
||||
|
||||
bool createBackEndSchedule(DynInstPtr &inst)
|
||||
|
@ -83,45 +85,48 @@ bool createBackEndSchedule(DynInstPtr &inst)
|
|||
return false;
|
||||
}
|
||||
|
||||
InstStage *E = inst->currentStage();
|
||||
InstStage *X = inst->addStage();
|
||||
InstStage *M = inst->addStage();
|
||||
InstStage *A = inst->addStage();
|
||||
InstStage *W = inst->addStage();
|
||||
|
||||
// EXECUTE
|
||||
for (int idx=0; idx < inst->numSrcRegs(); idx++) {
|
||||
if (!idx || !inst->isStore()) {
|
||||
E->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
|
||||
X->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( inst->isNonSpeculative() ) {
|
||||
// skip execution of non speculative insts until later
|
||||
} else if ( inst->isMemRef() ) {
|
||||
if ( inst->isLoad() ) {
|
||||
E->needs(AGEN, AGENUnit::GenerateAddr);
|
||||
E->needs(DCache, CacheUnit::InitiateReadData);
|
||||
X->needs(AGEN, AGENUnit::GenerateAddr);
|
||||
}
|
||||
} else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
|
||||
E->needs(MDU, MultDivUnit::StartMultDiv);
|
||||
X->needs(MDU, MultDivUnit::StartMultDiv);
|
||||
} else {
|
||||
E->needs(ExecUnit, ExecutionUnit::ExecuteInst);
|
||||
X->needs(ExecUnit, ExecutionUnit::ExecuteInst);
|
||||
}
|
||||
|
||||
if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
|
||||
M->needs(MDU, MultDivUnit::EndMultDiv);
|
||||
X->needs(MDU, MultDivUnit::EndMultDiv);
|
||||
}
|
||||
|
||||
// MEMORY
|
||||
if ( inst->isLoad() ) {
|
||||
M->needs(DCache, CacheUnit::CompleteReadData);
|
||||
M->needs(DCache, CacheUnit::InitiateReadData);
|
||||
} else if ( inst->isStore() ) {
|
||||
M->needs(RegManager, UseDefUnit::ReadSrcReg, 1);
|
||||
M->needs(AGEN, AGENUnit::GenerateAddr);
|
||||
M->needs(DCache, CacheUnit::InitiateWriteData);
|
||||
}
|
||||
|
||||
if ( inst->isStore() ) {
|
||||
A->needs(DCache, CacheUnit::CompleteWriteData);
|
||||
|
||||
// WRITEBACK
|
||||
if ( inst->isLoad() ) {
|
||||
W->needs(DCache, CacheUnit::CompleteReadData);
|
||||
} else if ( inst->isStore() ) {
|
||||
W->needs(DCache, CacheUnit::CompleteWriteData);
|
||||
}
|
||||
|
||||
if ( inst->isNonSpeculative() ) {
|
||||
|
|
|
@ -53,8 +53,8 @@ namespace ThePipeline {
|
|||
const unsigned StageWidth = 1;
|
||||
const unsigned BackEndStartStage = 2;
|
||||
|
||||
// Enumerated List of Resources The Pipeline Uses
|
||||
enum ResourceList {
|
||||
// List of Resources The Pipeline Uses
|
||||
enum ResourceId {
|
||||
FetchSeq = 0,
|
||||
ICache,
|
||||
Decode,
|
||||
|
@ -94,6 +94,7 @@ namespace ThePipeline {
|
|||
stageNum(stage_num), resNum(res_num), cmd(_cmd),
|
||||
idx(_idx), priority(_priority)
|
||||
{ }
|
||||
|
||||
virtual ~ScheduleEntry(){}
|
||||
|
||||
// Stage number to perform this service.
|
||||
|
@ -113,7 +114,8 @@ namespace ThePipeline {
|
|||
};
|
||||
|
||||
struct entryCompare {
|
||||
bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) const
|
||||
bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs)
|
||||
const
|
||||
{
|
||||
// Prioritize first by stage number that the resource is needed
|
||||
if (lhs->stageNum > rhs->stageNum) {
|
||||
|
@ -158,7 +160,6 @@ namespace ThePipeline {
|
|||
stageNum, nextTaskPriority++, unit, request, param
|
||||
));
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -235,3 +235,27 @@ RegDepMap::findBypassInst(unsigned idx)
|
|||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
RegDepMap::dump()
|
||||
{
|
||||
|
||||
for (int idx=0; idx < regMap.size(); idx++) {
|
||||
|
||||
if (regMap[idx].size() > 0) {
|
||||
cprintf("Reg #%i (size:%i): ", idx, regMap[idx].size());
|
||||
|
||||
std::list<DynInstPtr>::iterator list_it = regMap[idx].begin();
|
||||
std::list<DynInstPtr>::iterator list_end = regMap[idx].end();
|
||||
|
||||
while (list_it != list_end) {
|
||||
cprintf("[sn:%i] ", (*list_it)->seqNum);
|
||||
|
||||
list_it++;
|
||||
}
|
||||
|
||||
cprintf("\n");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,6 +88,8 @@ class RegDepMap
|
|||
/** Size of Dependency of Map */
|
||||
int depSize(unsigned idx);
|
||||
|
||||
void dump();
|
||||
|
||||
protected:
|
||||
// Eventually make this a map of lists for
|
||||
// efficiency sake!
|
||||
|
|
|
@ -47,6 +47,7 @@ Resource::Resource(string res_name, int res_id, int res_width,
|
|||
Resource::~Resource()
|
||||
{
|
||||
delete [] resourceEvent;
|
||||
delete deniedReq;
|
||||
}
|
||||
|
||||
|
||||
|
@ -80,7 +81,9 @@ Resource::regStats()
|
|||
{
|
||||
instReqsProcessed
|
||||
.name(name() + ".instReqsProcessed")
|
||||
.desc("Number of Instructions Requests that completed in this resource.");
|
||||
.desc("Number of Instructions Requests that completed in "
|
||||
"this resource.")
|
||||
.prereq(instReqsProcessed);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -98,11 +101,6 @@ Resource::slotsInUse()
|
|||
void
|
||||
Resource::freeSlot(int slot_idx)
|
||||
{
|
||||
DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource [slot:%i].\n",
|
||||
reqMap[slot_idx]->inst->readTid(),
|
||||
reqMap[slot_idx]->inst->seqNum,
|
||||
slot_idx);
|
||||
|
||||
// Put slot number on this resource's free list
|
||||
availSlots.push_back(slot_idx);
|
||||
|
||||
|
@ -159,7 +157,8 @@ Resource::getSlot(DynInstPtr inst)
|
|||
|
||||
while (map_it != map_end) {
|
||||
if ((*map_it).second) {
|
||||
DPRINTF(Resource, "Currently Serving request from: [tid:%i] [sn:%i].\n",
|
||||
DPRINTF(Resource, "Currently Serving request from: "
|
||||
"[tid:%i] [sn:%i].\n",
|
||||
(*map_it).second->getInst()->readTid(),
|
||||
(*map_it).second->getInst()->seqNum);
|
||||
}
|
||||
|
@ -176,7 +175,7 @@ Resource::request(DynInstPtr inst)
|
|||
// See if the resource is already serving this instruction.
|
||||
// If so, use that request;
|
||||
bool try_request = false;
|
||||
int slot_num;
|
||||
int slot_num = -1;
|
||||
int stage_num;
|
||||
ResReqPtr inst_req = findRequest(inst);
|
||||
|
||||
|
@ -202,10 +201,12 @@ Resource::request(DynInstPtr inst)
|
|||
inst_req = getRequest(inst, stage_num, id, slot_num, cmd);
|
||||
|
||||
if (inst->staticInst) {
|
||||
DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource.\n",
|
||||
DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this "
|
||||
"resource.\n",
|
||||
inst->readTid(), inst->seqNum);
|
||||
} else {
|
||||
DPRINTF(Resource, "[tid:%i]: instruction requesting this resource.\n",
|
||||
DPRINTF(Resource, "[tid:%i]: instruction requesting this "
|
||||
"resource.\n",
|
||||
inst->readTid());
|
||||
}
|
||||
|
||||
|
@ -232,7 +233,8 @@ Resource::requestAgain(DynInstPtr inst, bool &do_request)
|
|||
do_request = true;
|
||||
|
||||
if (inst->staticInst) {
|
||||
DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource again.\n",
|
||||
DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource "
|
||||
"again.\n",
|
||||
inst->readTid(), inst->seqNum);
|
||||
} else {
|
||||
DPRINTF(Resource, "[tid:%i]: requesting this resource again.\n",
|
||||
|
@ -254,15 +256,22 @@ Resource::findRequest(DynInstPtr inst)
|
|||
map<int, ResReqPtr>::iterator map_it = reqMap.begin();
|
||||
map<int, ResReqPtr>::iterator map_end = reqMap.end();
|
||||
|
||||
bool found = false;
|
||||
ResReqPtr req = NULL;
|
||||
|
||||
while (map_it != map_end) {
|
||||
if ((*map_it).second &&
|
||||
(*map_it).second->getInst() == inst) {
|
||||
return (*map_it).second;
|
||||
req = (*map_it).second;
|
||||
//return (*map_it).second;
|
||||
assert(found == false);
|
||||
found = true;
|
||||
}
|
||||
map_it++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return req;
|
||||
//return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -334,6 +343,12 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
Resource::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
|
||||
ThreadID tid)
|
||||
{
|
||||
squash(inst, stage_num, squash_seq_num, tid);
|
||||
}
|
||||
|
||||
Tick
|
||||
Resource::ticks(int num_cycles)
|
||||
|
@ -394,22 +409,72 @@ Resource::unscheduleEvent(DynInstPtr inst)
|
|||
|
||||
int ResourceRequest::resReqID = 0;
|
||||
|
||||
int ResourceRequest::resReqCount = 0;
|
||||
int ResourceRequest::maxReqCount = 0;
|
||||
|
||||
ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst,
|
||||
int stage_num, int res_idx, int slot_num,
|
||||
unsigned _cmd)
|
||||
: res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num),
|
||||
resIdx(res_idx), slotNum(slot_num), completed(false),
|
||||
squashed(false), processing(false), memStall(false)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
reqID = resReqID++;
|
||||
res->cpu->resReqCount++;
|
||||
DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID,
|
||||
res->cpu->resReqCount);
|
||||
|
||||
if (res->cpu->resReqCount > 100) {
|
||||
fatal("Too many undeleted resource requests. Memory leak?\n");
|
||||
}
|
||||
|
||||
if (res->cpu->resReqCount > maxReqCount) {
|
||||
maxReqCount = res->cpu->resReqCount;
|
||||
res->cpu->maxResReqCount = maxReqCount;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
stagePasses = 0;
|
||||
complSlotNum = -1;
|
||||
|
||||
}
|
||||
|
||||
ResourceRequest::~ResourceRequest()
|
||||
{
|
||||
#ifdef DEBUG
|
||||
res->cpu->resReqCount--;
|
||||
DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID,
|
||||
res->cpu->resReqCount);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
ResourceRequest::done(bool completed)
|
||||
{
|
||||
DPRINTF(Resource, "%s done with request from [sn:%i] [tid:%i].\n",
|
||||
res->name(), inst->seqNum, inst->readTid());
|
||||
DPRINTF(Resource, "%s [slot:%i] done with request from [sn:%i] [tid:%i].\n",
|
||||
res->name(), slotNum, inst->seqNum, inst->readTid());
|
||||
|
||||
setCompleted(completed);
|
||||
|
||||
// Add to remove list
|
||||
// Used for debugging purposes
|
||||
if (completed) {
|
||||
complSlotNum = slotNum;
|
||||
|
||||
// Would like to start a convention such as all requests deleted in resources/pipeline
|
||||
// but a little more complex then it seems...
|
||||
// For now, all COMPLETED requests deleted in resource..
|
||||
// all FAILED requests deleted in pipeline stage
|
||||
// *all SQUASHED requests deleted in resource
|
||||
res->cpu->reqRemoveList.push(res->reqMap[slotNum]);
|
||||
}
|
||||
|
||||
// Free Slot So Another Instruction Can Use This Resource
|
||||
res->freeSlot(slotNum);
|
||||
|
||||
// change slot # to -1, since we check slotNum to see if request is still valid
|
||||
slotNum = -1;
|
||||
|
||||
res->instReqsProcessed++;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,8 @@ class Resource {
|
|||
/** Define this function if resource, has a port to connect to an outside
|
||||
* simulation object.
|
||||
*/
|
||||
virtual Port* getPort(const std::string &if_name, int idx) { return NULL; }
|
||||
virtual Port* getPort(const std::string &if_name, int idx)
|
||||
{ return NULL; }
|
||||
|
||||
/** Return ID for this resource */
|
||||
int getId() { return id; }
|
||||
|
@ -92,6 +93,14 @@ class Resource {
|
|||
*/
|
||||
virtual void deactivateThread(ThreadID tid);
|
||||
|
||||
/** Resources that care about thread activation override this. */
|
||||
virtual void suspendThread(ThreadID tid) { }
|
||||
|
||||
/** Will be called the cycle before a context switch. Any bookkeeping
|
||||
* that needs to be kept for that, can be done here
|
||||
*/
|
||||
virtual void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) { }
|
||||
|
||||
/** Resources that care when an instruction has been graduated
|
||||
* can override this
|
||||
*/
|
||||
|
@ -114,9 +123,9 @@ class Resource {
|
|||
/** Free a resource slot */
|
||||
virtual void freeSlot(int slot_idx);
|
||||
|
||||
/** Request usage of a resource for this instruction. If this instruction already
|
||||
* has made this request to this resource, and that request is uncompleted
|
||||
* this function will just return that request
|
||||
/** Request usage of a resource for this instruction. If this instruction
|
||||
* already has made this request to this resource, and that request is
|
||||
* uncompleted this function will just return that request
|
||||
*/
|
||||
virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
|
||||
int res_idx, int slot_num,
|
||||
|
@ -155,6 +164,9 @@ class Resource {
|
|||
virtual void squash(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
virtual void squashDueToMemStall(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
/** The number of instructions available that this resource can
|
||||
* can still process
|
||||
*/
|
||||
|
@ -166,7 +178,8 @@ class Resource {
|
|||
/** Schedule resource event, regardless of its current state. */
|
||||
void scheduleEvent(int slot_idx, int delay);
|
||||
|
||||
/** Find instruction in list, Schedule resource event, regardless of its current state. */
|
||||
/** Find instruction in list, Schedule resource event, regardless of its
|
||||
* current state. */
|
||||
bool scheduleEvent(DynInstPtr inst, int delay);
|
||||
|
||||
/** Unschedule resource event, regardless of its current state. */
|
||||
|
@ -303,29 +316,13 @@ class ResourceRequest
|
|||
|
||||
static int resReqID;
|
||||
|
||||
static int resReqCount;
|
||||
static int maxReqCount;
|
||||
|
||||
public:
|
||||
ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num,
|
||||
int res_idx, int slot_num, unsigned _cmd)
|
||||
: res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num),
|
||||
resIdx(res_idx), slotNum(slot_num), completed(false),
|
||||
squashed(false), processing(false), waiting(false)
|
||||
{
|
||||
reqID = resReqID++;
|
||||
resReqCount++;
|
||||
DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, resReqCount);
|
||||
int res_idx, int slot_num, unsigned _cmd);
|
||||
|
||||
if (resReqCount > 100) {
|
||||
fatal("Too many undeleted resource requests. Memory leak?\n");
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~ResourceRequest()
|
||||
{
|
||||
resReqCount--;
|
||||
DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, resReqCount);
|
||||
}
|
||||
virtual ~ResourceRequest();
|
||||
|
||||
int reqID;
|
||||
|
||||
|
@ -334,6 +331,8 @@ class ResourceRequest
|
|||
*/
|
||||
void done(bool completed = true);
|
||||
|
||||
short stagePasses;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
//
|
||||
// GET RESOURCE REQUEST IDENTIFICATION / INFO
|
||||
|
@ -342,8 +341,11 @@ class ResourceRequest
|
|||
/** Get Resource Index */
|
||||
int getResIdx() { return resIdx; }
|
||||
|
||||
|
||||
/** Get Slot Number */
|
||||
int getSlot() { return slotNum; }
|
||||
int getComplSlot() { return complSlotNum; }
|
||||
bool hasSlot() { return slotNum >= 0; }
|
||||
|
||||
/** Get Stage Number */
|
||||
int getStageNum() { return stageNum; }
|
||||
|
@ -366,6 +368,9 @@ class ResourceRequest
|
|||
/** Instruction being used */
|
||||
DynInstPtr inst;
|
||||
|
||||
/** Not guaranteed to be set, used for debugging */
|
||||
InstSeqNum seqNum;
|
||||
|
||||
/** Fault Associated With This Resource Request */
|
||||
Fault fault;
|
||||
|
||||
|
@ -390,8 +395,8 @@ class ResourceRequest
|
|||
void setProcessing() { processing = true; }
|
||||
|
||||
/** Get/Set IsWaiting variables */
|
||||
bool isWaiting() { return waiting; }
|
||||
void setWaiting() { waiting = true; }
|
||||
bool isMemStall() { return memStall; }
|
||||
void setMemStall(bool stall = true) { memStall = stall; }
|
||||
|
||||
protected:
|
||||
/** Resource Identification */
|
||||
|
@ -399,12 +404,14 @@ class ResourceRequest
|
|||
int stageNum;
|
||||
int resIdx;
|
||||
int slotNum;
|
||||
int complSlotNum;
|
||||
|
||||
/** Resource Status */
|
||||
/** Resource Request Status */
|
||||
bool completed;
|
||||
bool squashed;
|
||||
bool processing;
|
||||
bool waiting;
|
||||
|
||||
bool memStall;
|
||||
};
|
||||
|
||||
#endif //__CPU_INORDER_RESOURCE_HH__
|
||||
|
|
|
@ -41,45 +41,74 @@ using namespace ThePipeline;
|
|||
ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
|
||||
: cpu(_cpu)
|
||||
{
|
||||
//@todo: use this function to instantiate the resources in resource pool. This will help in the
|
||||
//auto-generation of this pipeline model.
|
||||
//@todo: use this function to instantiate the resources in resource pool.
|
||||
//This will help in the auto-generation of this pipeline model.
|
||||
//ThePipeline::addResources(resources, memObjects);
|
||||
|
||||
// Declare Resource Objects
|
||||
// name - id - bandwidth - latency - CPU - Parameters
|
||||
// --------------------------------------------------
|
||||
resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params));
|
||||
resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq,
|
||||
StageWidth * 2, 0, _cpu, params));
|
||||
|
||||
memObjects.push_back(ICache);
|
||||
resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params));
|
||||
resources.push_back(new CacheUnit("icache_port", ICache,
|
||||
StageWidth * MaxThreads, 0, _cpu,
|
||||
params));
|
||||
|
||||
resources.push_back(new DecodeUnit("Decode-Unit", Decode, StageWidth, 0, _cpu, params));
|
||||
resources.push_back(new DecodeUnit("Decode-Unit", Decode,
|
||||
StageWidth, 0, _cpu, params));
|
||||
|
||||
resources.push_back(new BranchPredictor("Branch-Predictor", BPred, StageWidth, 0, _cpu, params));
|
||||
resources.push_back(new BranchPredictor("Branch-Predictor", BPred,
|
||||
StageWidth, 0, _cpu, params));
|
||||
|
||||
resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params));
|
||||
resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4,
|
||||
0, _cpu, params));
|
||||
|
||||
resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, StageWidth * MaxThreads, 0, _cpu, params));
|
||||
resources.push_back(new UseDefUnit("RegFile-Manager", RegManager,
|
||||
StageWidth * MaxThreads, 0, _cpu,
|
||||
params));
|
||||
|
||||
resources.push_back(new AGENUnit("AGEN-Unit", AGEN, StageWidth, 0, _cpu, params));
|
||||
resources.push_back(new AGENUnit("AGEN-Unit", AGEN,
|
||||
StageWidth, 0, _cpu, params));
|
||||
|
||||
resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, StageWidth, 0, _cpu, params));
|
||||
resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit,
|
||||
StageWidth, 0, _cpu, params));
|
||||
|
||||
resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params));
|
||||
resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu,
|
||||
params));
|
||||
|
||||
memObjects.push_back(DCache);
|
||||
resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params));
|
||||
resources.push_back(new CacheUnit("dcache_port", DCache,
|
||||
StageWidth * MaxThreads, 0, _cpu,
|
||||
params));
|
||||
|
||||
resources.push_back(new GraduationUnit("Graduation-Unit", Grad, StageWidth * MaxThreads, 0, _cpu, params));
|
||||
resources.push_back(new GraduationUnit("Graduation-Unit", Grad,
|
||||
StageWidth * MaxThreads, 0, _cpu,
|
||||
params));
|
||||
|
||||
resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params));
|
||||
resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4,
|
||||
0, _cpu, params));
|
||||
}
|
||||
|
||||
ResourcePool::~ResourcePool()
|
||||
{
|
||||
cout << "Deleting resources ..." << endl;
|
||||
|
||||
for (int i=0; i < resources.size(); i++) {
|
||||
DPRINTF(Resource, "Deleting resource: %s.\n", resources[i]->name());
|
||||
|
||||
delete resources[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ResourcePool::init()
|
||||
{
|
||||
for (int i=0; i < resources.size(); i++) {
|
||||
DPRINTF(Resource, "Initializing resource: %s.\n", resources[i]->name());
|
||||
DPRINTF(Resource, "Initializing resource: %s.\n",
|
||||
resources[i]->name());
|
||||
|
||||
resources[i]->init();
|
||||
}
|
||||
|
@ -113,8 +142,8 @@ ResourcePool::getPort(const std::string &if_name, int idx)
|
|||
int obj_idx = memObjects[i];
|
||||
Port *port = resources[obj_idx]->getPort(if_name, idx);
|
||||
if (port != NULL) {
|
||||
DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", if_name,
|
||||
resources[obj_idx]->name(), obj_idx);
|
||||
DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n",
|
||||
if_name, resources[obj_idx]->name(), obj_idx);
|
||||
return port;
|
||||
}
|
||||
}
|
||||
|
@ -131,7 +160,8 @@ ResourcePool::getPortIdx(const std::string &port_name)
|
|||
unsigned obj_idx = memObjects[i];
|
||||
Port *port = resources[obj_idx]->getPort(port_name, obj_idx);
|
||||
if (port != NULL) {
|
||||
DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, port_name);
|
||||
DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx,
|
||||
port_name);
|
||||
return obj_idx;
|
||||
}
|
||||
}
|
||||
|
@ -151,6 +181,25 @@ ResourcePool::getResIdx(const std::string &res_name)
|
|||
return idx;
|
||||
}
|
||||
|
||||
panic("Can't find resource idx for: %s\n", res_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ResourcePool::getResIdx(const ThePipeline::ResourceId &res_id)
|
||||
{
|
||||
int num_resources = resources.size();
|
||||
|
||||
for (int idx = 0; idx < num_resources; idx++) {
|
||||
if (resources[idx]->getId() == res_id)
|
||||
return idx;
|
||||
}
|
||||
|
||||
// todo: change return value to int and return a -1 here
|
||||
// maybe even have enumerated type
|
||||
// panic for now...
|
||||
panic("Can't find resource idx for: %i\n", res_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -167,7 +216,8 @@ void
|
|||
ResourcePool::squash(DynInstPtr inst, int res_idx, InstSeqNum done_seq_num,
|
||||
ThreadID tid)
|
||||
{
|
||||
resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, tid);
|
||||
resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num,
|
||||
tid);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -182,6 +232,12 @@ ResourcePool::slotsInUse(int res_idx)
|
|||
return resources[res_idx]->slotsInUse();
|
||||
}
|
||||
|
||||
//@todo: split this function and call this version schedulePoolEvent
|
||||
// and use this scheduleEvent for scheduling a specific event on
|
||||
// a resource
|
||||
//@todo: For arguments that arent being used in a ResPoolEvent, a dummyParam
|
||||
// or some typedef can be used to signify what's important info
|
||||
// to the event construction
|
||||
void
|
||||
ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
|
||||
int delay, int res_idx, ThreadID tid)
|
||||
|
@ -192,23 +248,45 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
|
|||
{
|
||||
case InOrderCPU::ActivateThread:
|
||||
{
|
||||
DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event for tick %i.\n",
|
||||
curTick + delay);
|
||||
ResPoolEvent *res_pool_event = new ResPoolEvent(this,
|
||||
DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event "
|
||||
"for tick %i, [tid:%i].\n", curTick + delay,
|
||||
inst->readTid());
|
||||
ResPoolEvent *res_pool_event =
|
||||
new ResPoolEvent(this,
|
||||
e_type,
|
||||
inst,
|
||||
inst->squashingStage,
|
||||
inst->bdelaySeqNum,
|
||||
inst->readTid());
|
||||
mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
|
||||
mainEventQueue.schedule(res_pool_event,
|
||||
curTick + cpu->ticks(delay));
|
||||
}
|
||||
break;
|
||||
|
||||
case InOrderCPU::HaltThread:
|
||||
case InOrderCPU::DeactivateThread:
|
||||
{
|
||||
|
||||
DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool "
|
||||
"Event for tick %i.\n", curTick + delay);
|
||||
ResPoolEvent *res_pool_event =
|
||||
new ResPoolEvent(this,
|
||||
e_type,
|
||||
inst,
|
||||
inst->squashingStage,
|
||||
inst->bdelaySeqNum,
|
||||
tid);
|
||||
|
||||
mainEventQueue.schedule(res_pool_event,
|
||||
curTick + cpu->ticks(delay));
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case InOrderCPU::SuspendThread:
|
||||
case InOrderCPU::DeallocateThread:
|
||||
{
|
||||
|
||||
DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool Event for tick %i.\n",
|
||||
DPRINTF(Resource, "Scheduling Suspend Thread Resource Pool Event for tick %i.\n",
|
||||
curTick + delay);
|
||||
ResPoolEvent *res_pool_event = new ResPoolEvent(this,
|
||||
e_type,
|
||||
|
@ -224,7 +302,54 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
|
|||
|
||||
case ResourcePool::InstGraduated:
|
||||
{
|
||||
DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool Event for tick %i.\n",
|
||||
DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool "
|
||||
"Event for tick %i.\n", curTick + delay);
|
||||
ResPoolEvent *res_pool_event =
|
||||
new ResPoolEvent(this,e_type,
|
||||
inst,
|
||||
inst->squashingStage,
|
||||
inst->seqNum,
|
||||
inst->readTid());
|
||||
mainEventQueue.schedule(res_pool_event,
|
||||
curTick + cpu->ticks(delay));
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case ResourcePool::SquashAll:
|
||||
{
|
||||
DPRINTF(Resource, "Scheduling Squash Resource Pool Event for "
|
||||
"tick %i.\n", curTick + delay);
|
||||
ResPoolEvent *res_pool_event =
|
||||
new ResPoolEvent(this,e_type,
|
||||
inst,
|
||||
inst->squashingStage,
|
||||
inst->bdelaySeqNum,
|
||||
inst->readTid());
|
||||
mainEventQueue.schedule(res_pool_event,
|
||||
curTick + cpu->ticks(delay));
|
||||
}
|
||||
break;
|
||||
|
||||
case InOrderCPU::SquashFromMemStall:
|
||||
{
|
||||
DPRINTF(Resource, "Scheduling Squash Due to Memory Stall Resource "
|
||||
"Pool Event for tick %i.\n",
|
||||
curTick + delay);
|
||||
ResPoolEvent *res_pool_event =
|
||||
new ResPoolEvent(this,e_type,
|
||||
inst,
|
||||
inst->squashingStage,
|
||||
inst->seqNum - 1,
|
||||
inst->readTid());
|
||||
mainEventQueue.schedule(res_pool_event,
|
||||
curTick + cpu->ticks(delay));
|
||||
}
|
||||
break;
|
||||
|
||||
case ResourcePool::UpdateAfterContextSwitch:
|
||||
{
|
||||
DPRINTF(Resource, "Scheduling UpdatePC Resource Pool Event for tick %i.\n",
|
||||
curTick + delay);
|
||||
ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
|
||||
inst,
|
||||
|
@ -236,22 +361,9 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
|
|||
}
|
||||
break;
|
||||
|
||||
case ResourcePool::SquashAll:
|
||||
{
|
||||
DPRINTF(Resource, "Scheduling Squash Resource Pool Event for tick %i.\n",
|
||||
curTick + delay);
|
||||
ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
|
||||
inst,
|
||||
inst->squashingStage,
|
||||
inst->bdelaySeqNum,
|
||||
inst->readTid());
|
||||
mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]);
|
||||
; // If Resource Pool doesnt recognize event, we ignore it.
|
||||
DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n",
|
||||
InOrderCPU::eventNames[e_type]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -265,8 +377,9 @@ void
|
|||
ResourcePool::squashAll(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum done_seq_num, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above [sn:%i].\n",
|
||||
stage_num, tid, done_seq_num);
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Squash All Event "
|
||||
" starting w/stage %i for all instructions above [sn:%i].\n",
|
||||
tid, stage_num, done_seq_num);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
|
@ -275,24 +388,50 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum done_seq_num, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting SquashDueToMemStall Event"
|
||||
" starting w/stage %i for all instructions above [sn:%i].\n",
|
||||
tid, stage_num, done_seq_num);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
for (int idx = 0; idx < num_resources; idx++) {
|
||||
resources[idx]->squashDueToMemStall(inst, stage_num, done_seq_num,
|
||||
tid);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ResourcePool::activateAll(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all resources.\n",
|
||||
tid);
|
||||
bool do_activate = cpu->threadModel != InOrderCPU::SwitchOnCacheMiss ||
|
||||
cpu->numActiveThreads() < 1 ||
|
||||
cpu->activeThreadId() == tid;
|
||||
|
||||
|
||||
if (do_activate) {
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all "
|
||||
"resources.\n", tid);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
for (int idx = 0; idx < num_resources; idx++) {
|
||||
resources[idx]->activateThread(tid);
|
||||
}
|
||||
} else {
|
||||
DPRINTF(Resource, "[tid:%i] Ignoring Thread Activation to all "
|
||||
"resources.\n", tid);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ResourcePool::deactivateAll(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all resources.\n",
|
||||
tid);
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all "
|
||||
"resources.\n", tid);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
|
@ -301,11 +440,24 @@ ResourcePool::deactivateAll(ThreadID tid)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
ResourcePool::suspendAll(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Thread Suspension to all resources.\n",
|
||||
tid);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
for (int idx = 0; idx < num_resources; idx++) {
|
||||
resources[idx]->suspendThread(tid);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all resources.\n",
|
||||
tid, seq_num);
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all "
|
||||
"resources.\n", tid, seq_num);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
|
@ -314,10 +466,18 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
|
|||
}
|
||||
}
|
||||
|
||||
ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool)
|
||||
: Event(CPU_Tick_Pri), resPool(_resPool),
|
||||
eventType((InOrderCPU::CPUEventType) Default)
|
||||
{ }
|
||||
void
|
||||
ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Resource, "[tid:%i] Broadcasting Update PC to all resources.\n",
|
||||
tid);
|
||||
|
||||
int num_resources = resources.size();
|
||||
|
||||
for (int idx = 0; idx < num_resources; idx++) {
|
||||
resources[idx]->updateAfterContextSwitch(inst, tid);
|
||||
}
|
||||
}
|
||||
|
||||
ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool,
|
||||
InOrderCPU::CPUEventType e_type,
|
||||
|
@ -340,11 +500,15 @@ ResourcePool::ResPoolEvent::process()
|
|||
resPool->activateAll(tid);
|
||||
break;
|
||||
|
||||
case InOrderCPU::SuspendThread:
|
||||
case InOrderCPU::DeallocateThread:
|
||||
case InOrderCPU::DeactivateThread:
|
||||
case InOrderCPU::HaltThread:
|
||||
resPool->deactivateAll(tid);
|
||||
break;
|
||||
|
||||
case InOrderCPU::SuspendThread:
|
||||
resPool->suspendAll(tid);
|
||||
break;
|
||||
|
||||
case ResourcePool::InstGraduated:
|
||||
resPool->instGraduated(seqNum, tid);
|
||||
break;
|
||||
|
@ -353,6 +517,14 @@ ResourcePool::ResPoolEvent::process()
|
|||
resPool->squashAll(inst, stageNum, seqNum, tid);
|
||||
break;
|
||||
|
||||
case InOrderCPU::SquashFromMemStall:
|
||||
resPool->squashDueToMemStall(inst, stageNum, seqNum, tid);
|
||||
break;
|
||||
|
||||
case ResourcePool::UpdateAfterContextSwitch:
|
||||
resPool->updateAfterContextSwitch(inst, tid);
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("Unrecognized Event Type");
|
||||
}
|
||||
|
|
|
@ -63,6 +63,7 @@ class ResourcePool {
|
|||
enum ResPoolEventType {
|
||||
InstGraduated = InOrderCPU::NumCPUEvents,
|
||||
SquashAll,
|
||||
UpdateAfterContextSwitch,
|
||||
Default
|
||||
};
|
||||
|
||||
|
@ -84,9 +85,6 @@ class ResourcePool {
|
|||
ThreadID tid;
|
||||
|
||||
public:
|
||||
/** Constructs a resource event. */
|
||||
ResPoolEvent(ResourcePool *_resPool);
|
||||
|
||||
/** Constructs a resource event. */
|
||||
ResPoolEvent(ResourcePool *_resPool,
|
||||
InOrderCPU::CPUEventType e_type,
|
||||
|
@ -124,7 +122,7 @@ class ResourcePool {
|
|||
|
||||
public:
|
||||
ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params);
|
||||
virtual ~ResourcePool() {}
|
||||
~ResourcePool();
|
||||
|
||||
std::string name();
|
||||
|
||||
|
@ -143,6 +141,7 @@ class ResourcePool {
|
|||
|
||||
/** Returns a specific resource. */
|
||||
unsigned getResIdx(const std::string &res_name);
|
||||
unsigned getResIdx(const ThePipeline::ResourceId &res_id);
|
||||
|
||||
/** Returns a pointer to a resource */
|
||||
Resource* getResource(int res_idx) { return resources[res_idx]; }
|
||||
|
@ -160,12 +159,24 @@ class ResourcePool {
|
|||
void squashAll(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum done_seq_num, ThreadID tid);
|
||||
|
||||
/** Squash Resources in Pool after a memory stall
|
||||
* NOTE: Only use during Switch-On-Miss Thread model
|
||||
*/
|
||||
void squashDueToMemStall(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum done_seq_num, ThreadID tid);
|
||||
|
||||
/** Activate Thread in all resources */
|
||||
void activateAll(ThreadID tid);
|
||||
|
||||
/** De-Activate Thread in all resources */
|
||||
void deactivateAll(ThreadID tid);
|
||||
|
||||
/** De-Activate Thread in all resources */
|
||||
void suspendAll(ThreadID tid);
|
||||
|
||||
/** Broadcast Context Switch Update to all resources */
|
||||
void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
|
||||
|
||||
/** Broadcast graduation to all resources */
|
||||
void instGraduated(InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "cpu/inorder/resources/cache_unit.hh"
|
||||
#include "cpu/inorder/pipeline_traits.hh"
|
||||
#include "cpu/inorder/cpu.hh"
|
||||
#include "cpu/inorder/resource_pool.hh"
|
||||
#include "mem/request.hh"
|
||||
|
||||
using namespace std;
|
||||
|
@ -49,14 +50,14 @@ using namespace ThePipeline;
|
|||
Tick
|
||||
CacheUnit::CachePort::recvAtomic(PacketPtr pkt)
|
||||
{
|
||||
panic("DefaultFetch doesn't expect recvAtomic callback!");
|
||||
panic("CacheUnit::CachePort doesn't expect recvAtomic callback!");
|
||||
return curTick;
|
||||
}
|
||||
|
||||
void
|
||||
CacheUnit::CachePort::recvFunctional(PacketPtr pkt)
|
||||
{
|
||||
panic("DefaultFetch doesn't expect recvFunctional callback!");
|
||||
panic("CacheUnit::CachePort doesn't expect recvFunctional callback!");
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -65,7 +66,7 @@ CacheUnit::CachePort::recvStatusChange(Status status)
|
|||
if (status == RangeChange)
|
||||
return;
|
||||
|
||||
panic("DefaultFetch doesn't expect recvStatusChange callback!");
|
||||
panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!");
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -84,8 +85,7 @@ CacheUnit::CachePort::recvRetry()
|
|||
CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
|
||||
int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
|
||||
: Resource(res_name, res_id, res_width, res_latency, _cpu),
|
||||
retryPkt(NULL), retrySlot(-1), cacheBlocked(false),
|
||||
predecoder(NULL)
|
||||
cachePortBlocked(false), predecoder(NULL)
|
||||
{
|
||||
cachePort = new CachePort(this);
|
||||
|
||||
|
@ -131,18 +131,24 @@ CacheUnit::init()
|
|||
int
|
||||
CacheUnit::getSlot(DynInstPtr inst)
|
||||
{
|
||||
ThreadID tid = inst->readTid();
|
||||
|
||||
if (tlbBlocked[inst->threadNumber]) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!inst->validMemAddr()) {
|
||||
panic("Mem. Addr. must be set before requesting cache access\n");
|
||||
// For a Split-Load, the instruction would have processed once already
|
||||
// causing the address to be unset.
|
||||
if (!inst->validMemAddr() && !inst->splitInst) {
|
||||
panic("[tid:%i][sn:%i] Mem. Addr. must be set before requesting cache access\n",
|
||||
inst->readTid(), inst->seqNum);
|
||||
}
|
||||
|
||||
Addr req_addr = inst->getMemAddr();
|
||||
|
||||
if (resName == "icache_port" ||
|
||||
find(addrList.begin(), addrList.end(), req_addr) == addrList.end()) {
|
||||
find(addrList[tid].begin(), addrList[tid].end(), req_addr) ==
|
||||
addrList[tid].end()) {
|
||||
|
||||
int new_slot = Resource::getSlot(inst);
|
||||
|
||||
|
@ -150,37 +156,115 @@ CacheUnit::getSlot(DynInstPtr inst)
|
|||
return -1;
|
||||
|
||||
inst->memTime = curTick;
|
||||
addrList.push_back(req_addr);
|
||||
addrMap[req_addr] = inst->seqNum;
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
|
||||
inst->readTid(), inst->seqNum, req_addr);
|
||||
setAddrDependency(inst);
|
||||
return new_slot;
|
||||
} else {
|
||||
// Allow same instruction multiple accesses to same address
|
||||
// should only happen maybe after a squashed inst. needs to replay
|
||||
if (addrMap[tid][req_addr] == inst->seqNum) {
|
||||
int new_slot = Resource::getSlot(inst);
|
||||
|
||||
if (new_slot == -1)
|
||||
return -1;
|
||||
|
||||
return new_slot;
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort,
|
||||
"Denying request because there is an outstanding"
|
||||
"[tid:%i] Denying request because there is an outstanding"
|
||||
" request to/for addr. %08p. by [sn:%i] @ tick %i\n",
|
||||
req_addr, addrMap[req_addr], inst->memTime);
|
||||
inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
CacheUnit::freeSlot(int slot_num)
|
||||
CacheUnit::setAddrDependency(DynInstPtr inst)
|
||||
{
|
||||
vector<Addr>::iterator vect_it = find(addrList.begin(), addrList.end(),
|
||||
reqMap[slot_num]->inst->getMemAddr());
|
||||
assert(vect_it != addrList.end());
|
||||
Addr req_addr = inst->getMemAddr();
|
||||
ThreadID tid = inst->readTid();
|
||||
|
||||
addrList[tid].push_back(req_addr);
|
||||
addrMap[tid][req_addr] = inst->seqNum;
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: Address %08p removed from dependency list\n",
|
||||
reqMap[slot_num]->inst->readTid(), (*vect_it));
|
||||
|
||||
addrList.erase(vect_it);
|
||||
|
||||
Resource::freeSlot(slot_num);
|
||||
"[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
|
||||
inst->readTid(), inst->seqNum, req_addr);
|
||||
DPRINTF(AddrDep,
|
||||
"[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
|
||||
inst->readTid(), inst->seqNum, req_addr);
|
||||
}
|
||||
|
||||
void
|
||||
CacheUnit::removeAddrDependency(DynInstPtr inst)
|
||||
{
|
||||
ThreadID tid = inst->readTid();
|
||||
|
||||
Addr mem_addr = inst->getMemAddr();
|
||||
|
||||
// Erase from Address List
|
||||
vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(),
|
||||
mem_addr);
|
||||
assert(vect_it != addrList[tid].end() || inst->splitInst);
|
||||
|
||||
if (vect_it != addrList[tid].end()) {
|
||||
DPRINTF(AddrDep,
|
||||
"[tid:%i]: [sn:%i] Address %08p removed from dependency list\n",
|
||||
inst->readTid(), inst->seqNum, (*vect_it));
|
||||
|
||||
addrList[tid].erase(vect_it);
|
||||
|
||||
// Erase From Address Map (Used for Debugging)
|
||||
addrMap[tid].erase(addrMap[tid].find(mem_addr));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
ResReqPtr
|
||||
CacheUnit::findRequest(DynInstPtr inst)
|
||||
{
|
||||
map<int, ResReqPtr>::iterator map_it = reqMap.begin();
|
||||
map<int, ResReqPtr>::iterator map_end = reqMap.end();
|
||||
|
||||
while (map_it != map_end) {
|
||||
CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second);
|
||||
assert(cache_req);
|
||||
|
||||
if (cache_req &&
|
||||
cache_req->getInst() == inst &&
|
||||
cache_req->instIdx == inst->resSched.top()->idx) {
|
||||
return cache_req;
|
||||
}
|
||||
map_it++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ResReqPtr
|
||||
CacheUnit::findSplitRequest(DynInstPtr inst, int idx)
|
||||
{
|
||||
map<int, ResReqPtr>::iterator map_it = reqMap.begin();
|
||||
map<int, ResReqPtr>::iterator map_end = reqMap.end();
|
||||
|
||||
while (map_it != map_end) {
|
||||
CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second);
|
||||
assert(cache_req);
|
||||
|
||||
if (cache_req &&
|
||||
cache_req->getInst() == inst &&
|
||||
cache_req->instIdx == idx) {
|
||||
return cache_req;
|
||||
}
|
||||
map_it++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
ResReqPtr
|
||||
CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
|
||||
int slot_num, unsigned cmd)
|
||||
|
@ -195,6 +279,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
|
|||
|
||||
switch (sched_entry->cmd)
|
||||
{
|
||||
case InitSecondSplitRead:
|
||||
pkt_cmd = MemCmd::ReadReq;
|
||||
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: Read request from [sn:%i] for addr %08p\n",
|
||||
inst->readTid(), inst->seqNum, inst->split2ndAddr);
|
||||
break;
|
||||
|
||||
case InitiateReadData:
|
||||
pkt_cmd = MemCmd::ReadReq;
|
||||
|
||||
|
@ -203,6 +295,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
|
|||
inst->readTid(), inst->seqNum, inst->getMemAddr());
|
||||
break;
|
||||
|
||||
case InitSecondSplitWrite:
|
||||
pkt_cmd = MemCmd::WriteReq;
|
||||
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: Write request from [sn:%i] for addr %08p\n",
|
||||
inst->readTid(), inst->seqNum, inst->split2ndAddr);
|
||||
break;
|
||||
|
||||
case InitiateWriteData:
|
||||
pkt_cmd = MemCmd::WriteReq;
|
||||
|
||||
|
@ -226,7 +326,8 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
|
|||
|
||||
return new CacheRequest(this, inst, stage_num, id, slot_num,
|
||||
sched_entry->cmd, 0, pkt_cmd,
|
||||
0/*flags*/, this->cpu->readCpuId());
|
||||
0/*flags*/, this->cpu->readCpuId(),
|
||||
inst->resSched.top()->idx);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -237,15 +338,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
|
|||
|
||||
// Check to see if this instruction is requesting the same command
|
||||
// or a different one
|
||||
if (cache_req->cmd != inst->resSched.top()->cmd) {
|
||||
if (cache_req->cmd != inst->resSched.top()->cmd &&
|
||||
cache_req->instIdx == inst->resSched.top()->idx) {
|
||||
// If different, then update command in the request
|
||||
cache_req->cmd = inst->resSched.top()->cmd;
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: [sn:%i]: Updating the command for this instruction\n",
|
||||
inst->readTid(), inst->seqNum);
|
||||
"[tid:%i]: [sn:%i]: Updating the command for this "
|
||||
"instruction\n ", inst->readTid(), inst->seqNum);
|
||||
|
||||
service_request = true;
|
||||
} else {
|
||||
} else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead &&
|
||||
inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) {
|
||||
// If same command, just check to see if memory access was completed
|
||||
// but dont try to re-execute
|
||||
DPRINTF(InOrderCachePort,
|
||||
|
@ -271,10 +374,23 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
|
|||
cpu->readCpuId(), inst->readTid());
|
||||
cache_req->memReq = inst->fetchMemReq;
|
||||
} else {
|
||||
inst->dataMemReq = new Request(inst->readTid(), aligned_addr,
|
||||
if (!cache_req->is2ndSplit()) {
|
||||
inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr,
|
||||
acc_size, flags, inst->readPC(),
|
||||
cpu->readCpuId(), inst->readTid());
|
||||
cache_req->memReq = inst->dataMemReq;
|
||||
} else {
|
||||
assert(inst->splitInst);
|
||||
|
||||
inst->splitMemReq = new Request(cpu->asid[tid],
|
||||
inst->split2ndAddr,
|
||||
acc_size,
|
||||
flags,
|
||||
inst->readPC(),
|
||||
cpu->readCpuId(),
|
||||
tid);
|
||||
cache_req->memReq = inst->splitMemReq;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -311,14 +427,93 @@ Fault
|
|||
CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
|
||||
{
|
||||
CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
|
||||
assert(cache_req);
|
||||
assert(cache_req && "Can't Find Instruction for Read!");
|
||||
|
||||
int acc_size = sizeof(T);
|
||||
doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read);
|
||||
// The block size of our peer
|
||||
unsigned blockSize = this->cachePort->peerBlockSize();
|
||||
|
||||
//The size of the data we're trying to read.
|
||||
int dataSize = sizeof(T);
|
||||
|
||||
if (inst->split2ndAccess) {
|
||||
dataSize = inst->split2ndSize;
|
||||
cache_req->splitAccess = true;
|
||||
cache_req->split2ndAccess = true;
|
||||
|
||||
DPRINTF(InOrderCachePort, "[sn:%i] Split Read Access (2 of 2) for (%#x, %#x).\n", inst->seqNum,
|
||||
inst->getMemAddr(), inst->split2ndAddr);
|
||||
}
|
||||
|
||||
|
||||
//The address of the second part of this access if it needs to be split
|
||||
//across a cache line boundary.
|
||||
Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
|
||||
|
||||
|
||||
if (secondAddr > addr && !inst->split2ndAccess) {
|
||||
DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum,
|
||||
addr, secondAddr);
|
||||
|
||||
// Save All "Total" Split Information
|
||||
// ==============================
|
||||
inst->splitInst = true;
|
||||
inst->splitMemData = new uint8_t[dataSize];
|
||||
inst->splitTotalSize = dataSize;
|
||||
|
||||
if (!inst->splitInstSked) {
|
||||
// Schedule Split Read/Complete for Instruction
|
||||
// ==============================
|
||||
int stage_num = cache_req->getStageNum();
|
||||
|
||||
int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
|
||||
|
||||
inst->resSched.push(new ScheduleEntry(stage_num,
|
||||
stage_pri,
|
||||
cpu->resPool->getResIdx(DCache),
|
||||
CacheUnit::InitSecondSplitRead,
|
||||
1)
|
||||
);
|
||||
|
||||
inst->resSched.push(new ScheduleEntry(stage_num + 1,
|
||||
1/*stage_pri*/,
|
||||
cpu->resPool->getResIdx(DCache),
|
||||
CacheUnit::CompleteSecondSplitRead,
|
||||
1)
|
||||
);
|
||||
inst->splitInstSked = true;
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n",
|
||||
inst->readTid(), inst->seqNum, addr, secondAddr);
|
||||
}
|
||||
|
||||
// Split Information for First Access
|
||||
// ==============================
|
||||
dataSize = secondAddr - addr;
|
||||
cache_req->splitAccess = true;
|
||||
|
||||
// Split Information for Second Access
|
||||
// ==============================
|
||||
inst->split2ndSize = addr + sizeof(T) - secondAddr;
|
||||
inst->split2ndAddr = secondAddr;
|
||||
inst->split2ndDataPtr = inst->splitMemData + dataSize;
|
||||
inst->split2ndFlags = flags;
|
||||
}
|
||||
|
||||
doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read);
|
||||
|
||||
if (cache_req->fault == NoFault) {
|
||||
cache_req->reqData = new uint8_t[acc_size];
|
||||
if (!cache_req->splitAccess) {
|
||||
cache_req->reqData = new uint8_t[dataSize];
|
||||
doCacheAccess(inst, NULL);
|
||||
} else {
|
||||
if (!inst->split2ndAccess) {
|
||||
cache_req->reqData = inst->splitMemData;
|
||||
} else {
|
||||
cache_req->reqData = inst->split2ndDataPtr;
|
||||
}
|
||||
|
||||
doCacheAccess(inst, NULL, cache_req);
|
||||
}
|
||||
}
|
||||
|
||||
return cache_req->fault;
|
||||
|
@ -330,14 +525,91 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
|
|||
uint64_t *write_res)
|
||||
{
|
||||
CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
|
||||
assert(cache_req);
|
||||
assert(cache_req && "Can't Find Instruction for Write!");
|
||||
|
||||
int acc_size = sizeof(T);
|
||||
doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write);
|
||||
// The block size of our peer
|
||||
unsigned blockSize = this->cachePort->peerBlockSize();
|
||||
|
||||
//The size of the data we're trying to read.
|
||||
int dataSize = sizeof(T);
|
||||
|
||||
if (inst->split2ndAccess) {
|
||||
dataSize = inst->split2ndSize;
|
||||
cache_req->splitAccess = true;
|
||||
cache_req->split2ndAccess = true;
|
||||
|
||||
DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (2 of 2) for (%#x, %#x).\n", inst->seqNum,
|
||||
inst->getMemAddr(), inst->split2ndAddr);
|
||||
}
|
||||
|
||||
//The address of the second part of this access if it needs to be split
|
||||
//across a cache line boundary.
|
||||
Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
|
||||
|
||||
if (secondAddr > addr && !inst->split2ndAccess) {
|
||||
|
||||
DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (1 of 2) for (%#x, %#x).\n", inst->seqNum,
|
||||
addr, secondAddr);
|
||||
|
||||
// Save All "Total" Split Information
|
||||
// ==============================
|
||||
inst->splitInst = true;
|
||||
inst->splitTotalSize = dataSize;
|
||||
|
||||
if (!inst->splitInstSked) {
|
||||
// Schedule Split Read/Complete for Instruction
|
||||
// ==============================
|
||||
int stage_num = cache_req->getStageNum();
|
||||
|
||||
int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
|
||||
|
||||
inst->resSched.push(new ScheduleEntry(stage_num,
|
||||
stage_pri,
|
||||
cpu->resPool->getResIdx(DCache),
|
||||
CacheUnit::InitSecondSplitWrite,
|
||||
1)
|
||||
);
|
||||
|
||||
inst->resSched.push(new ScheduleEntry(stage_num + 1,
|
||||
1/*stage_pri*/,
|
||||
cpu->resPool->getResIdx(DCache),
|
||||
CacheUnit::CompleteSecondSplitWrite,
|
||||
1)
|
||||
);
|
||||
inst->splitInstSked = true;
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n",
|
||||
inst->readTid(), inst->seqNum, addr, secondAddr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Split Information for First Access
|
||||
// ==============================
|
||||
dataSize = secondAddr - addr;
|
||||
cache_req->splitAccess = true;
|
||||
|
||||
// Split Information for Second Access
|
||||
// ==============================
|
||||
inst->split2ndSize = addr + sizeof(T) - secondAddr;
|
||||
inst->split2ndAddr = secondAddr;
|
||||
inst->split2ndStoreDataPtr = &cache_req->inst->storeData;
|
||||
inst->split2ndStoreDataPtr += dataSize;
|
||||
inst->split2ndFlags = flags;
|
||||
inst->splitInstSked = true;
|
||||
}
|
||||
|
||||
doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write);
|
||||
|
||||
if (cache_req->fault == NoFault) {
|
||||
cache_req->reqData = new uint8_t[acc_size];
|
||||
if (!cache_req->splitAccess) {
|
||||
// Remove this line since storeData is saved in INST?
|
||||
cache_req->reqData = new uint8_t[dataSize];
|
||||
doCacheAccess(inst, write_res);
|
||||
} else {
|
||||
doCacheAccess(inst, write_res, cache_req);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return cache_req->fault;
|
||||
|
@ -347,8 +619,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
|
|||
void
|
||||
CacheUnit::execute(int slot_num)
|
||||
{
|
||||
if (cacheBlocked) {
|
||||
DPRINTF(InOrderCachePort, "Cache Blocked. Cannot Access\n");
|
||||
if (cachePortBlocked) {
|
||||
DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -359,6 +631,8 @@ CacheUnit::execute(int slot_num)
|
|||
#if TRACING_ON
|
||||
ThreadID tid = inst->readTid();
|
||||
int seq_num = inst->seqNum;
|
||||
std::string acc_type = "write";
|
||||
|
||||
#endif
|
||||
|
||||
cache_req->fault = NoFault;
|
||||
|
@ -390,10 +664,14 @@ CacheUnit::execute(int slot_num)
|
|||
}
|
||||
|
||||
case InitiateReadData:
|
||||
#if TRACING_ON
|
||||
acc_type = "read";
|
||||
#endif
|
||||
case InitiateWriteData:
|
||||
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%u]: Initiating data access to %s for addr. %08p\n",
|
||||
tid, name(), cache_req->inst->getMemAddr());
|
||||
"[tid:%u]: [sn:%i] Initiating data %s access to %s for addr. %08p\n",
|
||||
tid, inst->seqNum, acc_type, name(), cache_req->inst->getMemAddr());
|
||||
|
||||
inst->setCurResSlot(slot_num);
|
||||
|
||||
|
@ -405,6 +683,26 @@ CacheUnit::execute(int slot_num)
|
|||
|
||||
break;
|
||||
|
||||
case InitSecondSplitRead:
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n",
|
||||
tid, inst->seqNum, name(), cache_req->inst->split2ndAddr);
|
||||
inst->split2ndAccess = true;
|
||||
assert(inst->split2ndAddr != 0);
|
||||
read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags);
|
||||
break;
|
||||
|
||||
case InitSecondSplitWrite:
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n",
|
||||
tid, inst->seqNum, name(), cache_req->inst->getMemAddr());
|
||||
|
||||
inst->split2ndAccess = true;
|
||||
assert(inst->split2ndAddr != 0);
|
||||
write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags, NULL);
|
||||
break;
|
||||
|
||||
|
||||
case CompleteFetch:
|
||||
if (cache_req->isMemAccComplete()) {
|
||||
DPRINTF(InOrderCachePort,
|
||||
|
@ -415,7 +713,14 @@ CacheUnit::execute(int slot_num)
|
|||
DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n",
|
||||
tid, seq_num, inst->staticInst->disassemble(inst->PC));
|
||||
|
||||
removeAddrDependency(inst);
|
||||
|
||||
delete cache_req->dataPkt;
|
||||
|
||||
// Do not stall and switch threads for fetch... for now..
|
||||
// TODO: We need to detect cache misses for latencies > 1
|
||||
// cache_req->setMemStall(false);
|
||||
|
||||
cache_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort,
|
||||
|
@ -425,6 +730,7 @@ CacheUnit::execute(int slot_num)
|
|||
"STALL: [tid:%i]: Fetch miss from %08p\n",
|
||||
tid, cache_req->inst->readPC());
|
||||
cache_req->setCompleted(false);
|
||||
//cache_req->setMemStall(true);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -437,11 +743,52 @@ CacheUnit::execute(int slot_num)
|
|||
if (cache_req->isMemAccComplete() ||
|
||||
inst->isDataPrefetch() ||
|
||||
inst->isInstPrefetch()) {
|
||||
removeAddrDependency(inst);
|
||||
cache_req->setMemStall(false);
|
||||
cache_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
|
||||
tid, cache_req->inst->getMemAddr());
|
||||
cache_req->setCompleted(false);
|
||||
cache_req->setMemStall(true);
|
||||
}
|
||||
break;
|
||||
|
||||
case CompleteSecondSplitRead:
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: [sn:%i]: Trying to Complete Split Data Read Access\n",
|
||||
tid, inst->seqNum);
|
||||
|
||||
if (cache_req->isMemAccComplete() ||
|
||||
inst->isDataPrefetch() ||
|
||||
inst->isInstPrefetch()) {
|
||||
removeAddrDependency(inst);
|
||||
cache_req->setMemStall(false);
|
||||
cache_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
|
||||
tid, cache_req->inst->split2ndAddr);
|
||||
cache_req->setCompleted(false);
|
||||
cache_req->setMemStall(true);
|
||||
}
|
||||
break;
|
||||
|
||||
case CompleteSecondSplitWrite:
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i]: [sn:%i]: Trying to Complete Split Data Write Access\n",
|
||||
tid, inst->seqNum);
|
||||
|
||||
if (cache_req->isMemAccComplete() ||
|
||||
inst->isDataPrefetch() ||
|
||||
inst->isInstPrefetch()) {
|
||||
removeAddrDependency(inst);
|
||||
cache_req->setMemStall(false);
|
||||
cache_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
|
||||
tid, cache_req->inst->split2ndAddr);
|
||||
cache_req->setCompleted(false);
|
||||
cache_req->setMemStall(true);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -462,8 +809,7 @@ CacheUnit::prefetch(DynInstPtr inst)
|
|||
// Clean-Up cache resource request so
|
||||
// other memory insts. can use them
|
||||
cache_req->setCompleted();
|
||||
cacheStatus = cacheAccessComplete;
|
||||
cacheBlocked = false;
|
||||
cachePortBlocked = false;
|
||||
cache_req->setMemAccPending(false);
|
||||
cache_req->setMemAccCompleted();
|
||||
inst->unsetMemAddr();
|
||||
|
@ -482,8 +828,7 @@ CacheUnit::writeHint(DynInstPtr inst)
|
|||
// Clean-Up cache resource request so
|
||||
// other memory insts. can use them
|
||||
cache_req->setCompleted();
|
||||
cacheStatus = cacheAccessComplete;
|
||||
cacheBlocked = false;
|
||||
cachePortBlocked = false;
|
||||
cache_req->setMemAccPending(false);
|
||||
cache_req->setMemAccCompleted();
|
||||
inst->unsetMemAddr();
|
||||
|
@ -491,15 +836,21 @@ CacheUnit::writeHint(DynInstPtr inst)
|
|||
|
||||
// @TODO: Split into doCacheRead() and doCacheWrite()
|
||||
Fault
|
||||
CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
|
||||
CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split_req)
|
||||
{
|
||||
Fault fault = NoFault;
|
||||
#if TRACING_ON
|
||||
ThreadID tid = inst->readTid();
|
||||
#endif
|
||||
|
||||
CacheReqPtr cache_req
|
||||
= dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
|
||||
CacheReqPtr cache_req;
|
||||
|
||||
if (split_req == NULL) {
|
||||
cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
|
||||
} else{
|
||||
cache_req = split_req;
|
||||
}
|
||||
|
||||
assert(cache_req);
|
||||
|
||||
// Check for LL/SC and if so change command
|
||||
|
@ -510,16 +861,21 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
|
|||
if (cache_req->pktCmd == MemCmd::WriteReq) {
|
||||
cache_req->pktCmd =
|
||||
cache_req->memReq->isSwap() ? MemCmd::SwapReq :
|
||||
(cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
|
||||
(cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq
|
||||
: MemCmd::WriteReq);
|
||||
}
|
||||
|
||||
cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd,
|
||||
Packet::Broadcast);
|
||||
Packet::Broadcast, cache_req->instIdx);
|
||||
|
||||
if (cache_req->dataPkt->isRead()) {
|
||||
cache_req->dataPkt->dataStatic(cache_req->reqData);
|
||||
} else if (cache_req->dataPkt->isWrite()) {
|
||||
if (inst->split2ndAccess) {
|
||||
cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr);
|
||||
} else {
|
||||
cache_req->dataPkt->dataStatic(&cache_req->inst->storeData);
|
||||
}
|
||||
|
||||
if (cache_req->memReq->isCondSwap()) {
|
||||
assert(write_res);
|
||||
|
@ -527,8 +883,6 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
|
|||
}
|
||||
}
|
||||
|
||||
cache_req->dataPkt->time = curTick;
|
||||
|
||||
bool do_access = true; // flag to suppress cache access
|
||||
|
||||
Request *memReq = cache_req->dataPkt->req;
|
||||
|
@ -546,28 +900,18 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
|
|||
if (do_access) {
|
||||
if (!cachePort->sendTiming(cache_req->dataPkt)) {
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i] [sn:%i] is waiting to retry request\n",
|
||||
tid, inst->seqNum);
|
||||
|
||||
retrySlot = cache_req->getSlot();
|
||||
retryReq = cache_req;
|
||||
retryPkt = cache_req->dataPkt;
|
||||
|
||||
cacheStatus = cacheWaitRetry;
|
||||
|
||||
//cacheBlocked = true;
|
||||
|
||||
DPRINTF(InOrderStall, "STALL: \n");
|
||||
|
||||
"[tid:%i] [sn:%i] cannot access cache, because port "
|
||||
"is blocked. now waiting to retry request\n", tid,
|
||||
inst->seqNum);
|
||||
cache_req->setCompleted(false);
|
||||
cachePortBlocked = true;
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i] [sn:%i] is now waiting for cache response\n",
|
||||
tid, inst->seqNum);
|
||||
cache_req->setCompleted();
|
||||
cache_req->setMemAccPending();
|
||||
cacheStatus = cacheWaitResponse;
|
||||
cacheBlocked = false;
|
||||
cachePortBlocked = false;
|
||||
}
|
||||
} else if (!do_access && memReq->isLLSC()){
|
||||
// Store-Conditional instructions complete even if they "failed"
|
||||
|
@ -594,6 +938,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
{
|
||||
// Cast to correct packet type
|
||||
CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt);
|
||||
|
||||
assert(cache_pkt);
|
||||
|
||||
if (cache_pkt->cacheReq->isSquashed()) {
|
||||
|
@ -601,9 +946,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
"Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
|
||||
cache_pkt->cacheReq->getInst()->readTid(),
|
||||
cache_pkt->cacheReq->getInst()->seqNum);
|
||||
DPRINTF(RefCount,
|
||||
"Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
|
||||
cache_pkt->cacheReq->getTid(),
|
||||
cache_pkt->cacheReq->seqNum);
|
||||
|
||||
cache_pkt->cacheReq->done();
|
||||
delete cache_pkt;
|
||||
|
||||
cpu->wakeCPU();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -615,7 +967,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
|
||||
// Cast to correct request type
|
||||
CacheRequest *cache_req = dynamic_cast<CacheReqPtr>(
|
||||
findRequest(cache_pkt->cacheReq->getInst()));
|
||||
findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx));
|
||||
|
||||
if (!cache_req) {
|
||||
warn(
|
||||
"[tid:%u]: [sn:%i]: Can't find slot for cache access to addr. %08p\n",
|
||||
cache_pkt->cacheReq->getInst()->readTid(),
|
||||
cache_pkt->cacheReq->getInst()->seqNum,
|
||||
cache_pkt->cacheReq->getInst()->getMemAddr());
|
||||
}
|
||||
|
||||
assert(cache_req);
|
||||
|
||||
|
||||
|
@ -641,7 +1002,8 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
ExtMachInst ext_inst;
|
||||
StaticInstPtr staticInst = NULL;
|
||||
Addr inst_pc = inst->readPC();
|
||||
MachInst mach_inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
|
||||
MachInst mach_inst =
|
||||
TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
|
||||
(cache_pkt->getPtr<uint8_t>()));
|
||||
|
||||
predecoder.setTC(cpu->thread[tid]->getTC());
|
||||
|
@ -661,7 +1023,31 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
"[tid:%u]: [sn:%i]: Processing cache access\n",
|
||||
tid, inst->seqNum);
|
||||
|
||||
if (inst->splitInst) {
|
||||
inst->splitFinishCnt++;
|
||||
|
||||
if (inst->splitFinishCnt == 2) {
|
||||
cache_req->memReq->setVirt(0/*inst->tid*/,
|
||||
inst->getMemAddr(),
|
||||
inst->splitTotalSize,
|
||||
0,
|
||||
0);
|
||||
|
||||
Packet split_pkt(cache_req->memReq, cache_req->pktCmd,
|
||||
Packet::Broadcast);
|
||||
|
||||
|
||||
if (inst->isLoad()) {
|
||||
split_pkt.dataStatic(inst->splitMemData);
|
||||
} else {
|
||||
split_pkt.dataStatic(&inst->storeData);
|
||||
}
|
||||
|
||||
inst->completeAcc(&split_pkt);
|
||||
}
|
||||
} else {
|
||||
inst->completeAcc(pkt);
|
||||
}
|
||||
|
||||
if (inst->isLoad()) {
|
||||
assert(cache_pkt->isRead());
|
||||
|
@ -696,6 +1082,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
cache_req->setMemAccPending(false);
|
||||
cache_req->setMemAccCompleted();
|
||||
|
||||
if (cache_req->isMemStall() &&
|
||||
cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
|
||||
DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n", tid);
|
||||
|
||||
cpu->activateContext(tid);
|
||||
|
||||
DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
|
||||
"miss.\n", tid);
|
||||
}
|
||||
|
||||
// Wake up the CPU (if it went to sleep and was waiting on this
|
||||
// completion event).
|
||||
cpu->wakeCPU();
|
||||
|
@ -717,22 +1113,14 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
|
|||
void
|
||||
CacheUnit::recvRetry()
|
||||
{
|
||||
DPRINTF(InOrderCachePort, "Retrying Request for [tid:%i] [sn:%i]\n",
|
||||
retryReq->inst->readTid(), retryReq->inst->seqNum);
|
||||
DPRINTF(InOrderCachePort, "Unblocking Cache Port. \n");
|
||||
|
||||
assert(retryPkt != NULL);
|
||||
assert(cacheBlocked);
|
||||
assert(cacheStatus == cacheWaitRetry);
|
||||
assert(cachePortBlocked);
|
||||
|
||||
if (cachePort->sendTiming(retryPkt)) {
|
||||
cacheStatus = cacheWaitResponse;
|
||||
retryPkt = NULL;
|
||||
cacheBlocked = false;
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort,
|
||||
"Retry Request for [tid:%i] [sn:%i] failed\n",
|
||||
retryReq->inst->readTid(), retryReq->inst->seqNum);
|
||||
}
|
||||
// Clear the cache port for use again
|
||||
cachePortBlocked = false;
|
||||
|
||||
cpu->wakeCPU();
|
||||
}
|
||||
|
||||
CacheUnitEvent::CacheUnitEvent()
|
||||
|
@ -755,7 +1143,8 @@ CacheUnitEvent::process()
|
|||
|
||||
tlb_res->tlbBlocked[tid] = false;
|
||||
|
||||
tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid);
|
||||
tlb_res->cpu->pipelineStage[stage_num]->
|
||||
unsetResStall(tlb_res->reqMap[slotIdx], tid);
|
||||
|
||||
req_ptr->tlbStall = false;
|
||||
|
||||
|
@ -764,6 +1153,26 @@ CacheUnitEvent::process()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid)
|
||||
{
|
||||
// If squashing due to memory stall, then we do NOT want to
|
||||
// squash the instruction that caused the stall so we
|
||||
// increment the sequence number here to prevent that.
|
||||
//
|
||||
// NOTE: This is only for the SwitchOnCacheMiss Model
|
||||
// NOTE: If you have multiple outstanding misses from the same
|
||||
// thread then you need to reevaluate this code
|
||||
// NOTE: squash should originate from
|
||||
// pipeline_stage.cc:processInstSchedule
|
||||
DPRINTF(InOrderCachePort, "Squashing above [sn:%u]\n",
|
||||
squash_seq_num + 1);
|
||||
|
||||
squash(inst, stage_num, squash_seq_num + 1, tid);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
CacheUnit::squash(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid)
|
||||
|
@ -784,6 +1193,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
|
|||
"[tid:%i] Squashing request from [sn:%i]\n",
|
||||
req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
|
||||
|
||||
if (req_ptr->isSquashed()) {
|
||||
DPRINTF(AddrDep, "Request for [tid:%i] [sn:%i] already squashed, ignoring squash process.\n",
|
||||
req_ptr->getInst()->readTid(),
|
||||
req_ptr->getInst()->seqNum);
|
||||
map_it++;
|
||||
continue;
|
||||
}
|
||||
|
||||
req_ptr->setSquashed();
|
||||
|
||||
req_ptr->getInst()->setSquashed();
|
||||
|
@ -798,7 +1215,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
|
|||
|
||||
int stall_stage = reqMap[req_slot_num]->getStageNum();
|
||||
|
||||
cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid);
|
||||
cpu->pipelineStage[stall_stage]->
|
||||
unsetResStall(reqMap[req_slot_num], tid);
|
||||
}
|
||||
|
||||
if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
|
||||
|
@ -807,7 +1225,29 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
|
|||
|
||||
// Mark slot for removal from resource
|
||||
slot_remove_list.push_back(req_ptr->getSlot());
|
||||
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i] Squashing request from [sn:%i]\n",
|
||||
req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
|
||||
} else {
|
||||
DPRINTF(InOrderCachePort,
|
||||
"[tid:%i] Request from [sn:%i] squashed, but still pending completion.\n",
|
||||
req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
|
||||
DPRINTF(RefCount,
|
||||
"[tid:%i] Request from [sn:%i] squashed (split:%i), but still pending completion.\n",
|
||||
req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum,
|
||||
req_ptr->getInst()->splitInst);
|
||||
}
|
||||
|
||||
if (req_ptr->getInst()->validMemAddr()) {
|
||||
DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to remove addr. %08p dependencies.\n",
|
||||
req_ptr->getInst()->readTid(),
|
||||
req_ptr->getInst()->seqNum,
|
||||
req_ptr->getInst()->getMemAddr());
|
||||
|
||||
removeAddrDependency(req_ptr->getInst());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
map_it++;
|
||||
|
@ -927,14 +1367,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr,
|
|||
|
||||
template<>
|
||||
Fault
|
||||
CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
|
||||
CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags,
|
||||
uint64_t *res)
|
||||
{
|
||||
return write(inst, *(uint64_t*)&data, addr, flags, res);
|
||||
}
|
||||
|
||||
template<>
|
||||
Fault
|
||||
CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
|
||||
CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags,
|
||||
uint64_t *res)
|
||||
{
|
||||
return write(inst, *(uint32_t*)&data, addr, flags, res);
|
||||
}
|
||||
|
@ -942,7 +1384,9 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_
|
|||
|
||||
template<>
|
||||
Fault
|
||||
CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
|
||||
CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags,
|
||||
uint64_t *res)
|
||||
{
|
||||
return write(inst, (uint32_t)data, addr, flags, res);
|
||||
}
|
||||
|
||||
|
|
|
@ -62,7 +62,6 @@ class CacheUnit : public Resource
|
|||
public:
|
||||
CacheUnit(std::string res_name, int res_id, int res_width,
|
||||
int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
|
||||
virtual ~CacheUnit() {}
|
||||
|
||||
enum Command {
|
||||
InitiateFetch,
|
||||
|
@ -73,7 +72,11 @@ class CacheUnit : public Resource
|
|||
CompleteWriteData,
|
||||
Fetch,
|
||||
ReadData,
|
||||
WriteData
|
||||
WriteData,
|
||||
InitSecondSplitRead,
|
||||
InitSecondSplitWrite,
|
||||
CompleteSecondSplitRead,
|
||||
CompleteSecondSplitWrite
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -119,24 +122,19 @@ class CacheUnit : public Resource
|
|||
virtual void recvRetry();
|
||||
};
|
||||
|
||||
enum CachePortStatus {
|
||||
cacheWaitResponse,
|
||||
cacheWaitRetry,
|
||||
cacheAccessComplete
|
||||
};
|
||||
|
||||
void init();
|
||||
|
||||
virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
|
||||
int res_idx, int slot_num,
|
||||
unsigned cmd);
|
||||
|
||||
ResReqPtr findRequest(DynInstPtr inst);
|
||||
ResReqPtr findSplitRequest(DynInstPtr inst, int idx);
|
||||
|
||||
void requestAgain(DynInstPtr inst, bool &try_request);
|
||||
|
||||
int getSlot(DynInstPtr inst);
|
||||
|
||||
void freeSlot(int slot_num);
|
||||
|
||||
/** Execute the function of this resource. The Default is action
|
||||
* is to do nothing. More specific models will derive from this
|
||||
* class and define their own execute function.
|
||||
|
@ -146,6 +144,9 @@ class CacheUnit : public Resource
|
|||
void squash(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
void squashDueToMemStall(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
/** Processes cache completion event. */
|
||||
void processCacheCompletion(PacketPtr pkt);
|
||||
|
||||
|
@ -173,7 +174,7 @@ class CacheUnit : public Resource
|
|||
/** Read/Write on behalf of an instruction.
|
||||
* curResSlot needs to be a valid value in instruction.
|
||||
*/
|
||||
Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL);
|
||||
Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL, CacheReqPtr split_req=NULL);
|
||||
|
||||
void prefetch(DynInstPtr inst);
|
||||
|
||||
|
@ -181,23 +182,18 @@ class CacheUnit : public Resource
|
|||
|
||||
uint64_t getMemData(Packet *packet);
|
||||
|
||||
void setAddrDependency(DynInstPtr inst);
|
||||
void removeAddrDependency(DynInstPtr inst);
|
||||
|
||||
protected:
|
||||
/** Cache interface. */
|
||||
CachePort *cachePort;
|
||||
|
||||
CachePortStatus cacheStatus;
|
||||
bool cachePortBlocked;
|
||||
|
||||
CacheReqPtr retryReq;
|
||||
std::vector<Addr> addrList[ThePipeline::MaxThreads];
|
||||
|
||||
PacketPtr retryPkt;
|
||||
|
||||
int retrySlot;
|
||||
|
||||
bool cacheBlocked;
|
||||
|
||||
std::vector<Addr> addrList;
|
||||
|
||||
std::map<Addr, InstSeqNum> addrMap;
|
||||
std::map<Addr, InstSeqNum> addrMap[ThePipeline::MaxThreads];
|
||||
|
||||
public:
|
||||
int cacheBlkSize;
|
||||
|
@ -249,17 +245,18 @@ class CacheRequest : public ResourceRequest
|
|||
public:
|
||||
CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx,
|
||||
int slot_num, unsigned cmd, int req_size,
|
||||
MemCmd::Command pkt_cmd, unsigned flags, int cpu_id)
|
||||
MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx)
|
||||
: ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd),
|
||||
pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
|
||||
retryPkt(NULL), memAccComplete(false), memAccPending(false),
|
||||
tlbStall(false)
|
||||
tlbStall(false), splitAccess(false), splitAccessNum(-1),
|
||||
split2ndAccess(false), instIdx(idx)
|
||||
{ }
|
||||
|
||||
|
||||
virtual ~CacheRequest()
|
||||
{
|
||||
if (reqData) {
|
||||
if (reqData && !splitAccess) {
|
||||
delete [] reqData;
|
||||
}
|
||||
}
|
||||
|
@ -273,6 +270,11 @@ class CacheRequest : public ResourceRequest
|
|||
memAccComplete = completed;
|
||||
}
|
||||
|
||||
bool is2ndSplit()
|
||||
{
|
||||
return split2ndAccess;
|
||||
}
|
||||
|
||||
bool isMemAccComplete() { return memAccComplete; }
|
||||
|
||||
void setMemAccPending(bool pending = true) { memAccPending = pending; }
|
||||
|
@ -288,19 +290,27 @@ class CacheRequest : public ResourceRequest
|
|||
bool memAccComplete;
|
||||
bool memAccPending;
|
||||
bool tlbStall;
|
||||
|
||||
bool splitAccess;
|
||||
int splitAccessNum;
|
||||
bool split2ndAccess;
|
||||
int instIdx;
|
||||
|
||||
};
|
||||
|
||||
class CacheReqPacket : public Packet
|
||||
{
|
||||
public:
|
||||
CacheReqPacket(CacheRequest *_req,
|
||||
Command _cmd, short _dest)
|
||||
: Packet(_req->memReq, _cmd, _dest), cacheReq(_req)
|
||||
Command _cmd, short _dest, int _idx = 0)
|
||||
: Packet(_req->memReq, _cmd, _dest), cacheReq(_req), instIdx(_idx)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
CacheRequest *cacheReq;
|
||||
int instIdx;
|
||||
|
||||
};
|
||||
|
||||
#endif //__CPU_CACHE_UNIT_HH__
|
||||
|
|
|
@ -54,6 +54,17 @@ ExecutionUnit::regStats()
|
|||
.name(name() + ".predictedNotTakenIncorrect")
|
||||
.desc("Number of Branches Incorrectly Predicted As Not Taken).");
|
||||
|
||||
lastExecuteCycle = curTick;
|
||||
|
||||
cyclesExecuted
|
||||
.name(name() + ".cyclesExecuted")
|
||||
.desc("Number of Cycles Execution Unit was used.");
|
||||
|
||||
utilization
|
||||
.name(name() + ".utilization")
|
||||
.desc("Utilization of Execution Unit (cycles / totalCycles).");
|
||||
utilization = cyclesExecuted / cpu->numCycles;
|
||||
|
||||
Resource::regStats();
|
||||
}
|
||||
|
||||
|
@ -75,6 +86,12 @@ ExecutionUnit::execute(int slot_num)
|
|||
{
|
||||
case ExecuteInst:
|
||||
{
|
||||
if (curTick != lastExecuteCycle) {
|
||||
lastExecuteCycle = curTick;
|
||||
cyclesExecuted++;
|
||||
}
|
||||
|
||||
|
||||
if (inst->isMemRef()) {
|
||||
panic("%s not configured to handle memory ops.\n", resName);
|
||||
} else if (inst->isControl()) {
|
||||
|
|
|
@ -52,7 +52,6 @@ class ExecutionUnit : public Resource {
|
|||
public:
|
||||
ExecutionUnit(std::string res_name, int res_id, int res_width,
|
||||
int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
|
||||
virtual ~ExecutionUnit() {}
|
||||
|
||||
public:
|
||||
virtual void regStats();
|
||||
|
@ -71,6 +70,11 @@ class ExecutionUnit : public Resource {
|
|||
/////////////////////////////////////////////////////////////////
|
||||
Stats::Scalar predictedTakenIncorrect;
|
||||
Stats::Scalar predictedNotTakenIncorrect;
|
||||
|
||||
Stats::Scalar cyclesExecuted;
|
||||
Tick lastExecuteCycle;
|
||||
|
||||
Stats::Formula utilization;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -54,6 +54,11 @@ FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width,
|
|||
}
|
||||
}
|
||||
|
||||
FetchSeqUnit::~FetchSeqUnit()
|
||||
{
|
||||
delete [] resourceEvent;
|
||||
}
|
||||
|
||||
void
|
||||
FetchSeqUnit::init()
|
||||
{
|
||||
|
@ -336,3 +341,35 @@ FetchSeqUnit::deactivateThread(ThreadID tid)
|
|||
if (thread_it != cpu->fetchPriorityList.end())
|
||||
cpu->fetchPriorityList.erase(thread_it);
|
||||
}
|
||||
|
||||
void
|
||||
FetchSeqUnit::suspendThread(ThreadID tid)
|
||||
{
|
||||
deactivateThread(tid);
|
||||
}
|
||||
|
||||
void
|
||||
FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
|
||||
{
|
||||
pcValid[tid] = true;
|
||||
|
||||
if (cpu->thread[tid]->lastGradIsBranch) {
|
||||
/** This function assumes that the instruction causing the context
|
||||
* switch was right after the branch. Thus, if it's not, then
|
||||
* we are updating incorrectly here
|
||||
*/
|
||||
assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC());
|
||||
|
||||
PC[tid] = cpu->thread[tid]->lastBranchNextNPC;
|
||||
nextPC[tid] = PC[tid] + instSize;
|
||||
nextNPC[tid] = nextPC[tid] + instSize;
|
||||
} else {
|
||||
PC[tid] = inst->readNextPC();
|
||||
nextPC[tid] = inst->readNextNPC();
|
||||
nextNPC[tid] = inst->readNextNPC() + instSize;
|
||||
}
|
||||
|
||||
DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch."
|
||||
"Assigning PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid],
|
||||
nextPC[tid], nextNPC[tid]);
|
||||
}
|
||||
|
|
|
@ -54,12 +54,15 @@ class FetchSeqUnit : public Resource {
|
|||
public:
|
||||
FetchSeqUnit(std::string res_name, int res_id, int res_width,
|
||||
int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
|
||||
virtual ~FetchSeqUnit() {}
|
||||
virtual ~FetchSeqUnit();
|
||||
|
||||
virtual void init();
|
||||
virtual void activateThread(ThreadID tid);
|
||||
virtual void deactivateThread(ThreadID tid);
|
||||
virtual void suspendThread(ThreadID tid);
|
||||
virtual void execute(int slot_num);
|
||||
void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
|
||||
|
||||
|
||||
/** Override default Resource squash sequence. This actually,
|
||||
* looks in the global communication buffer to get squash
|
||||
|
|
|
@ -79,8 +79,6 @@ GraduationUnit::execute(int slot_num)
|
|||
"[tid:%i] Graduating instruction [sn:%i].\n",
|
||||
tid, inst->seqNum);
|
||||
|
||||
DPRINTF(RefCount, "Refcount = %i.\n", 0/*inst->curCount()*/);
|
||||
|
||||
// Release Non-Speculative "Block" on instructions that could not execute
|
||||
// because there was a non-speculative inst. active.
|
||||
// @TODO: Fix this functionality. Probably too conservative.
|
||||
|
|
|
@ -63,8 +63,6 @@ class GraduationUnit : public Resource {
|
|||
bool *nonSpecInstActive[ThePipeline::MaxThreads];
|
||||
|
||||
InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads];
|
||||
|
||||
/** @todo: Add Resource Stats Here */
|
||||
};
|
||||
|
||||
#endif //__CPU_INORDER_GRAD_UNIT_HH__
|
||||
|
|
|
@ -52,7 +52,8 @@ InstBuffer::regStats()
|
|||
{
|
||||
instsBypassed
|
||||
.name(name() + ".instsBypassed")
|
||||
.desc("Number of Instructions Bypassed.");
|
||||
.desc("Number of Instructions Bypassed.")
|
||||
.prereq(instsBypassed);
|
||||
|
||||
Resource::regStats();
|
||||
}
|
||||
|
|
|
@ -57,7 +57,6 @@ class MultDivUnit : public Resource {
|
|||
public:
|
||||
MultDivUnit(std::string res_name, int res_id, int res_width,
|
||||
int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
|
||||
virtual ~MultDivUnit() {}
|
||||
|
||||
public:
|
||||
/** Override default Resource getSlot(). Will only getSlot if
|
||||
|
|
|
@ -59,6 +59,17 @@ UseDefUnit::UseDefUnit(string res_name, int res_id, int res_width,
|
|||
|
||||
}
|
||||
|
||||
void
|
||||
UseDefUnit::regStats()
|
||||
{
|
||||
uniqueRegsPerSwitch
|
||||
.name(name() + ".uniqueRegsPerSwitch")
|
||||
.desc("Number of Unique Registers Needed Per Context Switch")
|
||||
.prereq(uniqueRegsPerSwitch);
|
||||
|
||||
Resource::regStats();
|
||||
}
|
||||
|
||||
ResReqPtr
|
||||
UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
|
||||
int slot_num, unsigned cmd)
|
||||
|
@ -75,7 +86,8 @@ UseDefUnit::findRequest(DynInstPtr inst)
|
|||
map<int, ResReqPtr>::iterator map_end = reqMap.end();
|
||||
|
||||
while (map_it != map_end) {
|
||||
UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>((*map_it).second);
|
||||
UseDefRequest* ud_req =
|
||||
dynamic_cast<UseDefRequest*>((*map_it).second);
|
||||
assert(ud_req);
|
||||
|
||||
if (ud_req &&
|
||||
|
@ -107,9 +119,9 @@ UseDefUnit::execute(int slot_idx)
|
|||
// in the pipeline then stall instructions here
|
||||
if (*nonSpecInstActive[tid] == true &&
|
||||
seq_num > *nonSpecSeqNum[tid]) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because there is "
|
||||
"non-speculative instruction [sn:%i] has not graduated.\n",
|
||||
tid, seq_num, *nonSpecSeqNum[tid]);
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because"
|
||||
"there is non-speculative instruction [sn:%i] has not "
|
||||
"graduated.\n", tid, seq_num, *nonSpecSeqNum[tid]);
|
||||
return;
|
||||
} else if (inst->isNonSpeculative()) {
|
||||
*nonSpecInstActive[tid] = true;
|
||||
|
@ -122,90 +134,133 @@ UseDefUnit::execute(int slot_idx)
|
|||
{
|
||||
int reg_idx = inst->_srcRegIdx[ud_idx];
|
||||
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source register idx %i (reg #%i).\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source "
|
||||
"register idx %i (reg #%i).\n",
|
||||
tid, ud_idx, reg_idx);
|
||||
|
||||
// Ask register dependency map if it is OK to read from Arch. Reg. File
|
||||
// Ask register dependency map if it is OK to read from Arch.
|
||||
// Reg. File
|
||||
if (regDepMap[tid]->canRead(reg_idx, inst)) {
|
||||
|
||||
uniqueRegMap[reg_idx] = true;
|
||||
|
||||
if (inst->seqNum <= outReadSeqNum[tid]) {
|
||||
if (reg_idx < FP_Base_DepTag) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i from Register File:%i.\n",
|
||||
tid, reg_idx, cpu->readIntReg(reg_idx,inst->readTid()));
|
||||
inst->setIntSrc(ud_idx,
|
||||
cpu->readIntReg(reg_idx,inst->readTid()));
|
||||
} else if (reg_idx < Ctrl_Base_DepTag) {
|
||||
reg_idx -= FP_Base_DepTag;
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i from Register File:%x (%08f).\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i"
|
||||
"from Register File:%i.\n",
|
||||
tid,
|
||||
reg_idx,
|
||||
cpu->readFloatRegBits(reg_idx, inst->readTid()),
|
||||
cpu->readFloatReg(reg_idx, inst->readTid()));
|
||||
cpu->readIntReg(reg_idx,inst->readTid()));
|
||||
inst->setIntSrc(ud_idx,
|
||||
cpu->readIntReg(reg_idx,
|
||||
inst->readTid()));
|
||||
} else if (reg_idx < Ctrl_Base_DepTag) {
|
||||
reg_idx -= FP_Base_DepTag;
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i"
|
||||
"from Register File:%x (%08f).\n",
|
||||
tid,
|
||||
reg_idx,
|
||||
cpu->readFloatRegBits(reg_idx,
|
||||
inst->readTid()),
|
||||
cpu->readFloatReg(reg_idx,
|
||||
inst->readTid()));
|
||||
|
||||
inst->setFloatSrc(ud_idx,
|
||||
cpu->readFloatReg(reg_idx, inst->readTid()));
|
||||
cpu->readFloatReg(reg_idx,
|
||||
inst->readTid()));
|
||||
} else {
|
||||
reg_idx -= Ctrl_Base_DepTag;
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i from Register File:%i.\n",
|
||||
tid, reg_idx, cpu->readMiscReg(reg_idx, inst->readTid()));
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i "
|
||||
"from Register File:%i.\n",
|
||||
tid,
|
||||
reg_idx,
|
||||
cpu->readMiscReg(reg_idx,
|
||||
inst->readTid()));
|
||||
inst->setIntSrc(ud_idx,
|
||||
cpu->readMiscReg(reg_idx, inst->readTid()));
|
||||
cpu->readMiscReg(reg_idx,
|
||||
inst->readTid()));
|
||||
}
|
||||
|
||||
outReadSeqNum[tid] = maxSeqNum;
|
||||
|
||||
ud_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's"
|
||||
" registers yet.\n", tid, outReadSeqNum[tid]);
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to write\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because "
|
||||
"of [sn:%i] hasnt read it's registers yet.\n",
|
||||
tid, outReadSeqNum[tid]);
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
|
||||
"[sn:%i] to write\n",
|
||||
tid, outReadSeqNum[tid]);
|
||||
ud_req->done(false);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Look for forwarding opportunities
|
||||
DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, ud_idx, inst);
|
||||
DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx,
|
||||
ud_idx,
|
||||
inst);
|
||||
|
||||
if (forward_inst) {
|
||||
|
||||
if (inst->seqNum <= outReadSeqNum[tid]) {
|
||||
int dest_reg_idx = forward_inst->getDestIdxNum(reg_idx);
|
||||
int dest_reg_idx =
|
||||
forward_inst->getDestIdxNum(reg_idx);
|
||||
|
||||
if (reg_idx < FP_Base_DepTag) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
|
||||
" reg value 0x%x from "
|
||||
"[sn:%i] to [sn:%i] source #%i.\n",
|
||||
tid, forward_inst->readIntResult(dest_reg_idx) ,
|
||||
forward_inst->seqNum, inst->seqNum, ud_idx);
|
||||
inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx));
|
||||
tid,
|
||||
forward_inst->readIntResult(dest_reg_idx),
|
||||
forward_inst->seqNum,
|
||||
inst->seqNum, ud_idx);
|
||||
inst->setIntSrc(ud_idx,
|
||||
forward_inst->
|
||||
readIntResult(dest_reg_idx));
|
||||
} else if (reg_idx < Ctrl_Base_DepTag) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
|
||||
" reg value 0x%x from "
|
||||
"[sn:%i] to [sn:%i] source #%i.\n",
|
||||
tid, forward_inst->readFloatResult(dest_reg_idx) ,
|
||||
tid,
|
||||
forward_inst->readFloatResult(dest_reg_idx),
|
||||
forward_inst->seqNum, inst->seqNum, ud_idx);
|
||||
inst->setFloatSrc(ud_idx,
|
||||
forward_inst->readFloatResult(dest_reg_idx));
|
||||
forward_inst->
|
||||
readFloatResult(dest_reg_idx));
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
|
||||
" reg value 0x%x from "
|
||||
"[sn:%i] to [sn:%i] source #%i.\n",
|
||||
tid, forward_inst->readIntResult(dest_reg_idx) ,
|
||||
forward_inst->seqNum, inst->seqNum, ud_idx);
|
||||
inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx));
|
||||
tid,
|
||||
forward_inst->readIntResult(dest_reg_idx),
|
||||
forward_inst->seqNum,
|
||||
inst->seqNum, ud_idx);
|
||||
inst->setIntSrc(ud_idx,
|
||||
forward_inst->
|
||||
readIntResult(dest_reg_idx));
|
||||
}
|
||||
|
||||
outReadSeqNum[tid] = maxSeqNum;
|
||||
|
||||
ud_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's"
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read "
|
||||
"because of [sn:%i] hasnt read it's"
|
||||
" registers yet.\n", tid, outReadSeqNum[tid]);
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to forward\n",
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
|
||||
"[sn:%i] to forward\n",
|
||||
tid, outReadSeqNum[tid]);
|
||||
ud_req->done(false);
|
||||
}
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i is not ready to read.\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i"
|
||||
"is not ready to read.\n",
|
||||
tid, reg_idx);
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read register (idx=%i)\n",
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read "
|
||||
"register (idx=%i)\n",
|
||||
tid, reg_idx);
|
||||
outReadSeqNum[tid] = inst->seqNum;
|
||||
ud_req->done(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -216,12 +271,14 @@ UseDefUnit::execute(int slot_idx)
|
|||
int reg_idx = inst->_destRegIdx[ud_idx];
|
||||
|
||||
if (regDepMap[tid]->canWrite(reg_idx, inst)) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i & Attempting to write to Register File.\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i &"
|
||||
"Attempting to write to Register File.\n",
|
||||
tid, reg_idx);
|
||||
|
||||
uniqueRegMap[reg_idx] = true;
|
||||
if (inst->seqNum <= outReadSeqNum[tid]) {
|
||||
if (reg_idx < FP_Base_DepTag) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result 0x%x to register idx %i.\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result "
|
||||
"0x%x to register idx %i.\n",
|
||||
tid, inst->readIntResult(ud_idx), reg_idx);
|
||||
|
||||
// Remove Dependencies
|
||||
|
@ -236,33 +293,54 @@ UseDefUnit::execute(int slot_idx)
|
|||
|
||||
reg_idx -= FP_Base_DepTag;
|
||||
|
||||
if (inst->resultType(ud_idx) == InOrderDynInst::Integer) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits Result 0x%x (bits:0x%x) to register idx %i.\n",
|
||||
tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
|
||||
if (inst->resultType(ud_idx) ==
|
||||
InOrderDynInst::Integer) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits "
|
||||
"Result 0x%x (bits:0x%x) to register "
|
||||
"idx %i.\n",
|
||||
tid,
|
||||
inst->readFloatResult(ud_idx),
|
||||
inst->readIntResult(ud_idx),
|
||||
reg_idx);
|
||||
|
||||
cpu->setFloatRegBits(reg_idx, // Check for FloatRegBits Here
|
||||
// Check for FloatRegBits Here
|
||||
cpu->setFloatRegBits(reg_idx,
|
||||
inst->readIntResult(ud_idx),
|
||||
inst->readTid());
|
||||
} else if (inst->resultType(ud_idx) == InOrderDynInst::Float) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float Result 0x%x (bits:0x%x) to register idx %i.\n",
|
||||
tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
|
||||
} else if (inst->resultType(ud_idx) ==
|
||||
InOrderDynInst::Float) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float "
|
||||
"Result 0x%x (bits:0x%x) to register "
|
||||
"idx %i.\n",
|
||||
tid, inst->readFloatResult(ud_idx),
|
||||
inst->readIntResult(ud_idx),
|
||||
reg_idx);
|
||||
|
||||
cpu->setFloatReg(reg_idx,
|
||||
inst->readFloatResult(ud_idx),
|
||||
inst->readTid());
|
||||
} else if (inst->resultType(ud_idx) == InOrderDynInst::Double) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double Result 0x%x (bits:0x%x) to register idx %i.\n",
|
||||
tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
|
||||
} else if (inst->resultType(ud_idx) ==
|
||||
InOrderDynInst::Double) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double "
|
||||
"Result 0x%x (bits:0x%x) to register "
|
||||
"idx %i.\n",
|
||||
tid,
|
||||
inst->readFloatResult(ud_idx),
|
||||
inst->readIntResult(ud_idx),
|
||||
reg_idx);
|
||||
|
||||
cpu->setFloatReg(reg_idx, // Check for FloatRegBits Here
|
||||
// Check for FloatRegBits Here
|
||||
cpu->setFloatReg(reg_idx,
|
||||
inst->readFloatResult(ud_idx),
|
||||
inst->readTid());
|
||||
} else {
|
||||
panic("Result Type Not Set For [sn:%i] %s.\n", inst->seqNum, inst->instName());
|
||||
panic("Result Type Not Set For [sn:%i] %s.\n",
|
||||
inst->seqNum, inst->instName());
|
||||
}
|
||||
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x to register idx %i.\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x "
|
||||
"to register idx %i.\n",
|
||||
tid, inst->readIntResult(ud_idx), reg_idx);
|
||||
|
||||
// Remove Dependencies
|
||||
|
@ -279,17 +357,23 @@ UseDefUnit::execute(int slot_idx)
|
|||
|
||||
ud_req->done();
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because of [sn:%i] hasnt read it's"
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because "
|
||||
"of [sn:%i] hasnt read it's"
|
||||
" registers yet.\n", tid, outReadSeqNum);
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to read\n",
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
|
||||
"[sn:%i] to read\n",
|
||||
tid, outReadSeqNum);
|
||||
ud_req->done(false);
|
||||
}
|
||||
} else {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is not ready to write.\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is "
|
||||
"not ready to write.\n",
|
||||
tid, reg_idx);
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write register (idx=%i)\n",
|
||||
DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write "
|
||||
"register (idx=%i)\n",
|
||||
tid, reg_idx);
|
||||
outWriteSeqNum[tid] = inst->seqNum;
|
||||
ud_req->done(false);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -323,12 +407,16 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
|
|||
req_ptr->getInst()->readTid(),
|
||||
req_ptr->getInst()->seqNum);
|
||||
|
||||
regDepMap[tid]->remove(req_ptr->getInst());
|
||||
|
||||
int req_slot_num = req_ptr->getSlot();
|
||||
|
||||
if (latency > 0)
|
||||
if (latency > 0) {
|
||||
assert(0);
|
||||
|
||||
unscheduleEvent(req_slot_num);
|
||||
}
|
||||
|
||||
// Mark request for later removal
|
||||
cpu->reqRemoveList.push(req_ptr);
|
||||
|
||||
// Mark slot for removal from resource
|
||||
slot_remove_list.push_back(req_ptr->getSlot());
|
||||
|
@ -343,18 +431,29 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
|
|||
}
|
||||
|
||||
if (outReadSeqNum[tid] >= squash_seq_num) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", tid);
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n",
|
||||
tid);
|
||||
outReadSeqNum[tid] = maxSeqNum;
|
||||
} else if (outReadSeqNum[tid] != maxSeqNum) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read Seq Num %i\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read "
|
||||
"Seq Num %i\n",
|
||||
tid, outReadSeqNum[tid]);
|
||||
}
|
||||
|
||||
if (outWriteSeqNum[tid] >= squash_seq_num) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", tid);
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n",
|
||||
tid);
|
||||
outWriteSeqNum[tid] = maxSeqNum;
|
||||
} else if (outWriteSeqNum[tid] != maxSeqNum) {
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write Seq Num %i\n",
|
||||
DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write "
|
||||
"Seq Num %i\n",
|
||||
tid, outWriteSeqNum[tid]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
UseDefUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
|
||||
{
|
||||
uniqueRegsPerSwitch = uniqueRegMap.size();
|
||||
uniqueRegMap.clear();
|
||||
}
|
||||
|
|
|
@ -68,8 +68,12 @@ class UseDefUnit : public Resource {
|
|||
virtual void squash(DynInstPtr inst, int stage_num,
|
||||
InstSeqNum squash_seq_num, ThreadID tid);
|
||||
|
||||
void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
|
||||
|
||||
const InstSeqNum maxSeqNum;
|
||||
|
||||
void regStats();
|
||||
|
||||
protected:
|
||||
RegDepMap *regDepMap[ThePipeline::MaxThreads];
|
||||
|
||||
|
@ -84,14 +88,18 @@ class UseDefUnit : public Resource {
|
|||
|
||||
InstSeqNum floatRegSize[ThePipeline::MaxThreads];
|
||||
|
||||
Stats::Average uniqueRegsPerSwitch;
|
||||
std::map<unsigned, bool> uniqueRegMap;
|
||||
|
||||
public:
|
||||
class UseDefRequest : public ResourceRequest {
|
||||
public:
|
||||
typedef ThePipeline::DynInstPtr DynInstPtr;
|
||||
|
||||
public:
|
||||
UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, int res_idx,
|
||||
int slot_num, unsigned cmd, int use_def_idx)
|
||||
UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num,
|
||||
int res_idx, int slot_num, unsigned cmd,
|
||||
int use_def_idx)
|
||||
: ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd),
|
||||
useDefIdx(use_def_idx)
|
||||
{ }
|
||||
|
|
|
@ -242,21 +242,21 @@ InOrderThreadContext::setRegOtherThread(int misc_reg, const MiscReg &val,
|
|||
void
|
||||
InOrderThreadContext::setPC(uint64_t val)
|
||||
{
|
||||
DPRINTF(InOrderCPU, "Setting PC to %08p\n", val);
|
||||
DPRINTF(InOrderCPU, "[tid:%i] Setting PC to %08p\n", thread->readTid(), val);
|
||||
cpu->setPC(val, thread->readTid());
|
||||
}
|
||||
|
||||
void
|
||||
InOrderThreadContext::setNextPC(uint64_t val)
|
||||
{
|
||||
DPRINTF(InOrderCPU, "Setting NPC to %08p\n", val);
|
||||
DPRINTF(InOrderCPU, "[tid:%i] Setting NPC to %08p\n", thread->readTid(), val);
|
||||
cpu->setNextPC(val, thread->readTid());
|
||||
}
|
||||
|
||||
void
|
||||
InOrderThreadContext::setNextNPC(uint64_t val)
|
||||
{
|
||||
DPRINTF(InOrderCPU, "Setting NNPC to %08p\n", val);
|
||||
DPRINTF(InOrderCPU, "[tid:%i] Setting NNPC to %08p\n", thread->readTid(), val);
|
||||
cpu->setNextNPC(val, thread->readTid());
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,6 @@ class InOrderThreadContext : public ThreadContext
|
|||
/** Pointer to the thread state that this TC corrseponds to. */
|
||||
InOrderThreadState *thread;
|
||||
|
||||
|
||||
/** Returns a pointer to the ITB. */
|
||||
/** @TODO: PERF: Should we bind this to a pointer in constructor? */
|
||||
TheISA::TLB *getITBPtr() { return cpu->getITBPtr(); }
|
||||
|
|
|
@ -79,14 +79,14 @@ class InOrderThreadState : public ThreadState {
|
|||
#if FULL_SYSTEM
|
||||
InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num)
|
||||
: ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num),
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false)
|
||||
{ }
|
||||
#else
|
||||
InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num,
|
||||
Process *_process)
|
||||
: ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num,
|
||||
_process),
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
|
@ -105,10 +105,15 @@ class InOrderThreadState : public ThreadState {
|
|||
/** Returns a pointer to the TC of this thread. */
|
||||
ThreadContext *getTC() { return tc; }
|
||||
|
||||
/** Return the thread id */
|
||||
int readTid() { return threadId(); }
|
||||
|
||||
/** Pointer to the last graduated instruction in the thread */
|
||||
//DynInstPtr lastGradInst;
|
||||
|
||||
/** Is last instruction graduated a branch? */
|
||||
bool lastGradIsBranch;
|
||||
Addr lastBranchPC;
|
||||
Addr lastBranchNextPC;
|
||||
Addr lastBranchNextNPC;
|
||||
};
|
||||
|
||||
#endif // __CPU_INORDER_THREAD_STATE_HH__
|
||||
|
|
|
@ -63,6 +63,7 @@ progress_interval=0
|
|||
stageTracing=false
|
||||
stageWidth=1
|
||||
system=system
|
||||
threadModel=SMT
|
||||
tracer=system.cpu.tracer
|
||||
workload=system.cpu.workload
|
||||
dcache_port=system.cpu.dcache.cpu_side
|
||||
|
@ -78,7 +79,6 @@ hash_delay=1
|
|||
latency=1000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=10000
|
||||
|
@ -113,7 +113,6 @@ hash_delay=1
|
|||
latency=1000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=10000
|
||||
|
@ -148,7 +147,6 @@ hash_delay=1
|
|||
latency=10000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=100000
|
||||
|
|
|
@ -5,10 +5,10 @@ The Regents of The University of Michigan
|
|||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Jul 4 2009 20:43:52
|
||||
M5 revision 20167772fb15 6281 default tip
|
||||
M5 started Jul 4 2009 20:43:52
|
||||
M5 executing on tater
|
||||
M5 compiled Jan 30 2010 14:58:44
|
||||
M5 revision 4b602939e245 6707 default inorder_vortex_alpha qtip tip
|
||||
M5 started Jan 30 2010 14:58:45
|
||||
M5 executing on zooks
|
||||
command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
|
|
|
@ -1,88 +1,87 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 66323 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 296324 # Number of bytes of host memory used
|
||||
host_seconds 1331.98 # Real time elapsed on the host
|
||||
host_tick_rate 81990812 # Simulator tick rate (ticks/s)
|
||||
host_inst_rate 51950 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 166756 # Number of bytes of host memory used
|
||||
host_seconds 1700.48 # Real time elapsed on the host
|
||||
host_tick_rate 63220517 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 88340673 # Number of instructions simulated
|
||||
sim_seconds 0.109210 # Number of seconds simulated
|
||||
sim_ticks 109210014500 # Number of ticks simulated
|
||||
sim_seconds 0.107505 # Number of seconds simulated
|
||||
sim_ticks 107505320500 # Number of ticks simulated
|
||||
system.cpu.AGEN-Unit.instReqsProcessed 35224018 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 88340674 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 10443271 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 3311206 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 88340674 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 88523379 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 10466150 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 3314731 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 88523379 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.cyclesExecuted 53070972 # Number of Cycles Execution Unit was used.
|
||||
system.cpu.Execution-Unit.instReqsProcessed 53075554 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.predictedNotTakenIncorrect 4515835 # Number of Branches Incorrectly Predicted As Not Taken).
|
||||
system.cpu.Execution-Unit.predictedTakenIncorrect 1659774 # Number of Branches Incorrectly Predicted As Taken.
|
||||
system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 184507615 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.predictedNotTakenIncorrect 4515839 # Number of Branches Incorrectly Predicted As Not Taken).
|
||||
system.cpu.Execution-Unit.predictedTakenIncorrect 1659770 # Number of Branches Incorrectly Predicted As Taken.
|
||||
system.cpu.Execution-Unit.utilization 0.246830 # Utilization of Execution Unit (cycles / totalCycles).
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 186350086 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Graduation-Unit.instReqsProcessed 88340673 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed.
|
||||
system.cpu.Mult-Div-Unit.instReqsProcessed 82202 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.multInstReqsProcessed 41101 # Number of Multiply Requests Processed.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 158796488 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 165783241 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.activity 86.931340 # Percentage of cycles cpu is active
|
||||
system.cpu.committedInsts 88340673 # Number of Instructions Simulated (Per-Thread)
|
||||
system.cpu.committedInsts_total 88340673 # Number of Instructions Simulated (Total)
|
||||
system.cpu.cpi 2.472474 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 2.472474 # CPI: Total CPI of All Threads
|
||||
system.cpu.contextSwitches 1 # Number of context switches
|
||||
system.cpu.cpi 2.433881 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 2.433881 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 20276638 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 38181.240129 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 35069.166968 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 20215854 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 2320808500 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.002998 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 60784 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_hits 18 # number of ReadReq MSHR hits
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 2131013000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 38009.956226 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 34917.034197 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 20215872 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 2309713000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.002997 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 60766 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 2121768500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.002997 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 60766 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_accesses 14613377 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56049.825426 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53049.825426 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56040.926479 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53040.926479 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 14463584 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 8395871500 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 8394538500 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.010250 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 149793 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 7946492500 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 7945159500 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.010250 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 149793 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_refs 169.741509 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.avg_refs 169.741568 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 34890015 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 50891.977756 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 47860.720748 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 34679438 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 10716680000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_avg_miss_latency 50837.302134 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 47810.485422 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 34679456 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 10704251500 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.006035 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 210577 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 18 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 10077505500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_misses 210559 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 10066928000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.006035 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 210559 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.overall_accesses 34890015 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 50891.977756 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 47860.720748 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_miss_latency 50837.302134 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 47810.485422 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 34679438 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 10716680000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_hits 34679456 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 10704251500 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.006035 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 210577 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 18 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 10077505500 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_misses 210559 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 10066928000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.006035 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 210559 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -90,9 +89,9 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0
|
|||
system.cpu.dcache.replacements 200248 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 204344 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 4077.182458 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 34685659 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 848449000 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.tagsinuse 4076.864414 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 34685671 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 848885000 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 147714 # number of writebacks
|
||||
system.cpu.dcache_port.instReqsProcessed 35224018 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.dtb.data_accesses 34987415 # DTB accesses
|
||||
|
@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses 14620629 # DT
|
|||
system.cpu.dtb.write_acv 0 # DTB write access violations
|
||||
system.cpu.dtb.write_hits 14613377 # DTB write hits
|
||||
system.cpu.dtb.write_misses 7252 # DTB write misses
|
||||
system.cpu.icache.ReadReq_accesses 96166938 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 19084.949617 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 15849.033723 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 96087744 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 1511413500 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_accesses 97826463 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 19024.038820 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 15840.795350 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 97745885 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 1532919000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.000824 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 79194 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 1266 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 1235083500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000810 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 80578 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 2650 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 1234441500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000797 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 77928 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 1233.032338 # Average number of references to valid blocks.
|
||||
system.cpu.icache.avg_refs 1254.310197 # Average number of references to valid blocks.
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 96166938 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 19084.949617 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 15849.033723 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 96087744 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 1511413500 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_accesses 97826463 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 19024.038820 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 15840.795350 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 97745885 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 1532919000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.000824 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 79194 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 1266 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 1235083500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000810 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 80578 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 2650 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 1234441500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000797 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 77928 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.overall_accesses 96166938 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 19084.949617 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 15849.033723 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_accesses 97826463 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 19024.038820 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 15840.795350 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 96087744 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 1511413500 # number of overall miss cycles
|
||||
system.cpu.icache.overall_hits 97745885 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 1532919000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.000824 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 79194 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 1266 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 1235083500 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000810 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 80578 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 2650 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 1234441500 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000797 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 77928 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.icache.replacements 75882 # number of replacements
|
||||
system.cpu.icache.sampled_refs 77928 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 1874.320715 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 96087744 # Total number of references to valid blocks.
|
||||
system.cpu.icache.tagsinuse 1873.747475 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 97745885 # Total number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.icache_port.instReqsProcessed 96166940 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.ipc 0.404453 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.404453 # IPC: Total IPC of All Threads
|
||||
system.cpu.icache_port.instReqsProcessed 97826706 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.idleCycles 28099010 # Number of cycles cpu's stages were not processed
|
||||
system.cpu.ipc 0.410867 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.410867 # IPC: Total IPC of All Threads
|
||||
system.cpu.itb.data_accesses 0 # DTB accesses
|
||||
system.cpu.itb.data_acv 0 # DTB access violations
|
||||
system.cpu.itb.data_hits 0 # DTB hits
|
||||
system.cpu.itb.data_misses 0 # DTB misses
|
||||
system.cpu.itb.fetch_accesses 96170872 # ITB accesses
|
||||
system.cpu.itb.fetch_accesses 97830397 # ITB accesses
|
||||
system.cpu.itb.fetch_acv 0 # ITB acv
|
||||
system.cpu.itb.fetch_hits 96166938 # ITB hits
|
||||
system.cpu.itb.fetch_hits 97826463 # ITB hits
|
||||
system.cpu.itb.fetch_misses 3934 # ITB misses
|
||||
system.cpu.itb.read_accesses 0 # DTB read accesses
|
||||
system.cpu.itb.read_acv 0 # DTB read access violations
|
||||
|
@ -185,31 +185,31 @@ system.cpu.itb.write_acv 0 # DT
|
|||
system.cpu.itb.write_hits 0 # DTB write hits
|
||||
system.cpu.itb.write_misses 0 # DTB write misses
|
||||
system.cpu.l2cache.ReadExReq_accesses 143578 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52038.849963 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.083578 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 7471634000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52034.768558 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.222875 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 7471048000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_misses 143578 # number of ReadExReq misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 5743132000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 5743152000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 143578 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.ReadReq_accesses 138694 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52316.057051 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40003.485162 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52087.681159 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40004.623879 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 95224 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 2274179000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_latency 2264251500 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.313424 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 43470 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1738951500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1739001000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.313424 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 43470 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.UpgradeReq_accesses 6215 # number of UpgradeReq accesses(hits+misses)
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 51993.805310 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000.884956 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 323141500 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 51862.831858 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40002.815768 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 322327500 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_misses 6215 # number of UpgradeReq misses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 248605500 # number of UpgradeReq MSHR miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 248617500 # number of UpgradeReq MSHR miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_misses 6215 # number of UpgradeReq MSHR misses
|
||||
system.cpu.l2cache.Writeback_accesses 147714 # number of Writeback accesses(hits+misses)
|
||||
|
@ -223,29 +223,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 #
|
|||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 282272 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52103.272957 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000.874107 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52047.065459 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40001.245670 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 95224 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 9745813000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_latency 9735299500 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.662652 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 187048 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 7482083500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 7482153000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.662652 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 187048 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.overall_accesses 282272 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52103.272957 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40000.874107 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52047.065459 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40001.245670 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 95224 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 9745813000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_latency 9735299500 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.662652 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 187048 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 7482083500 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 7482153000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.662652 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 187048 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -253,16 +253,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0
|
|||
system.cpu.l2cache.replacements 147733 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 172939 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 18262.944082 # Cycle average of tags in use
|
||||
system.cpu.l2cache.tagsinuse 18257.402494 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 110306 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 120636 # number of writebacks
|
||||
system.cpu.numCycles 218420030 # number of cpu cycles simulated
|
||||
system.cpu.numCycles 215010642 # number of cpu cycles simulated
|
||||
system.cpu.runCycles 186911632 # Number of cycles cpu stages are processed.
|
||||
system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode
|
||||
system.cpu.smt_cpi no_value # CPI: Total SMT-CPI
|
||||
system.cpu.smt_ipc no_value # IPC: Total SMT-IPC
|
||||
system.cpu.threadCycles 218420030 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.stage-0.idleCycles 117180245 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-0.runCycles 97830397 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-0.utilization 45.500258 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-1.idleCycles 126487263 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-1.runCycles 88523379 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-1.utilization 41.171627 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-2.idleCycles 125185318 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-2.runCycles 89825324 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-2.utilization 41.777153 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-3.idleCycles 179779372 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-3.runCycles 35231270 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-3.utilization 16.385826 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-4.idleCycles 126669969 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-4.runCycles 88340673 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-4.utilization 41.086651 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.threadCycles 215010642 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.workload.PROG:num_syscalls 4583 # Number of system calls
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
|
|
@ -63,6 +63,7 @@ progress_interval=0
|
|||
stageTracing=false
|
||||
stageWidth=1
|
||||
system=system
|
||||
threadModel=SMT
|
||||
tracer=system.cpu.tracer
|
||||
workload=system.cpu.workload
|
||||
dcache_port=system.cpu.dcache.cpu_side
|
||||
|
@ -78,7 +79,6 @@ hash_delay=1
|
|||
latency=1000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=10000
|
||||
|
@ -113,7 +113,6 @@ hash_delay=1
|
|||
latency=1000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=10000
|
||||
|
@ -148,7 +147,6 @@ hash_delay=1
|
|||
latency=10000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=100000
|
||||
|
|
|
@ -5,10 +5,10 @@ The Regents of The University of Michigan
|
|||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Jul 4 2009 20:43:52
|
||||
M5 revision 20167772fb15 6281 default tip
|
||||
M5 started Jul 4 2009 20:43:52
|
||||
M5 executing on tater
|
||||
M5 compiled Jan 29 2010 09:29:58
|
||||
M5 revision a196f8cf520a 6706 default qtip tip inorder_twolf_alpha
|
||||
M5 started Jan 29 2010 09:31:14
|
||||
M5 executing on zooks
|
||||
command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing
|
||||
Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sav
|
||||
Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sv2
|
||||
|
|
|
@ -1,88 +1,87 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 69440 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 210892 # Number of bytes of host memory used
|
||||
host_seconds 1323.48 # Real time elapsed on the host
|
||||
host_tick_rate 76516395 # Simulator tick rate (ticks/s)
|
||||
host_inst_rate 55182 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 156168 # Number of bytes of host memory used
|
||||
host_seconds 1665.47 # Real time elapsed on the host
|
||||
host_tick_rate 59164617 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 91903056 # Number of instructions simulated
|
||||
sim_seconds 0.101268 # Number of seconds simulated
|
||||
sim_ticks 101268061000 # Number of ticks simulated
|
||||
sim_seconds 0.098537 # Number of seconds simulated
|
||||
sim_ticks 98536744000 # Number of ticks simulated
|
||||
system.cpu.AGEN-Unit.instReqsProcessed 26537108 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 91903057 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 8198984 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 2041701 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 91903057 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 92657148 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 8232810 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 2041716 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 92657148 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.cyclesExecuted 64907308 # Number of Cycles Execution Unit was used.
|
||||
system.cpu.Execution-Unit.instReqsProcessed 64907696 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.predictedNotTakenIncorrect 3739118 # Number of Branches Incorrectly Predicted As Not Taken).
|
||||
system.cpu.Execution-Unit.predictedTakenIncorrect 1029596 # Number of Branches Incorrectly Predicted As Taken.
|
||||
system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 189586934 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.utilization 0.329356 # Utilization of Execution Unit (cycles / totalCycles).
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 191370621 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Graduation-Unit.instReqsProcessed 91903056 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed.
|
||||
system.cpu.Mult-Div-Unit.instReqsProcessed 916504 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.multInstReqsProcessed 458252 # Number of Multiply Requests Processed.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 188816950 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 196152134 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.activity 96.743392 # Percentage of cycles cpu is active
|
||||
system.cpu.committedInsts 91903056 # Number of Instructions Simulated (Per-Thread)
|
||||
system.cpu.committedInsts_total 91903056 # Number of Instructions Simulated (Total)
|
||||
system.cpu.cpi 2.203802 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 2.203802 # CPI: Total CPI of All Threads
|
||||
system.cpu.contextSwitches 1 # Number of context switches
|
||||
system.cpu.cpi 2.144363 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 2.144363 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 19996198 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 51623.700624 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48550.526316 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 19995717 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 24831000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 51569.473684 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48547.368421 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 19995723 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 24495500 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.000024 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 481 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_hits 6 # number of ReadReq MSHR hits
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 23061500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_misses 475 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 23060000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000024 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 475 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_accesses 6501103 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56415.277031 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53415.277031 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56295.857988 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53295.857988 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 6499244 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 104876000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 104654000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.000286 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 1859 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 99299000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 99077000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.000286 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 1859 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_refs 11918.612686 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.avg_refs 11918.613585 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 26497301 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 55430.341880 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 52425.235647 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 26494961 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 129707000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_avg_miss_latency 55333.976007 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 52329.477292 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 26494967 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 129149500 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.000088 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 2340 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 6 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 122360500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_misses 2334 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 122137000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.000088 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 2334 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.overall_accesses 26497301 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 55430.341880 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 52425.235647 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_miss_latency 55333.976007 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 52329.477292 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 26494961 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 129707000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_hits 26494967 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 129149500 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.000088 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 2340 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 6 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 122360500 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_misses 2334 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 122137000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.000088 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 2334 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -90,8 +89,8 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0
|
|||
system.cpu.dcache.replacements 157 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 2223 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 1441.819572 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 26495076 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.tagsinuse 1441.684134 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 26495078 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 104 # number of writebacks
|
||||
system.cpu.dcache_port.instReqsProcessed 26537108 # Number of Instructions Requests that completed in this resource.
|
||||
|
@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses 6501126 # DT
|
|||
system.cpu.dtb.write_acv 0 # DTB write access violations
|
||||
system.cpu.dtb.write_hits 6501103 # DTB write hits
|
||||
system.cpu.dtb.write_misses 23 # DTB write misses
|
||||
system.cpu.icache.ReadReq_accesses 97683877 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 27282.787360 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 24026.266636 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 97675238 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 235696000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_accesses 98713473 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 27258.057090 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 23994.339402 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 98704785 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 236818000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.000088 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 8639 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 73 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 205809000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000088 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 8566 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.ReadReq_misses 8688 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 120 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 205583500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000087 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 8568 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 11402.666122 # Average number of references to valid blocks.
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets 1000 # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 11520.166317 # Average number of references to valid blocks.
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 1 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 1000 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 97683877 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 27282.787360 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 24026.266636 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 97675238 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 235696000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_accesses 98713473 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 27258.057090 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 23994.339402 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 98704785 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 236818000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.000088 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 8639 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 73 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 205809000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000088 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 8566 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.demand_misses 8688 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 120 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 205583500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000087 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 8568 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.overall_accesses 97683877 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 27282.787360 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 24026.266636 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_accesses 98713473 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 27258.057090 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 23994.339402 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 97675238 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 235696000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_hits 98704785 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 236818000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.000088 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 8639 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 73 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 205809000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000088 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 8566 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_misses 8688 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 120 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 205583500 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000087 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 8568 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.icache.replacements 6732 # number of replacements
|
||||
system.cpu.icache.sampled_refs 8566 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.replacements 6734 # number of replacements
|
||||
system.cpu.icache.sampled_refs 8568 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 1428.614683 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 97675238 # Total number of references to valid blocks.
|
||||
system.cpu.icache.tagsinuse 1428.229557 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 98704785 # Total number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.icache_port.instReqsProcessed 97683876 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.ipc 0.453761 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.453761 # IPC: Total IPC of All Threads
|
||||
system.cpu.icache_port.instReqsProcessed 98713472 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.idleCycles 6417911 # Number of cycles cpu's stages were not processed
|
||||
system.cpu.ipc 0.466339 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.466339 # IPC: Total IPC of All Threads
|
||||
system.cpu.itb.data_accesses 0 # DTB accesses
|
||||
system.cpu.itb.data_acv 0 # DTB access violations
|
||||
system.cpu.itb.data_hits 0 # DTB hits
|
||||
system.cpu.itb.data_misses 0 # DTB misses
|
||||
system.cpu.itb.fetch_accesses 97683924 # ITB accesses
|
||||
system.cpu.itb.fetch_accesses 98713520 # ITB accesses
|
||||
system.cpu.itb.fetch_acv 0 # ITB acv
|
||||
system.cpu.itb.fetch_hits 97683877 # ITB hits
|
||||
system.cpu.itb.fetch_hits 98713473 # ITB hits
|
||||
system.cpu.itb.fetch_misses 47 # ITB misses
|
||||
system.cpu.itb.read_accesses 0 # DTB read accesses
|
||||
system.cpu.itb.read_acv 0 # DTB read access violations
|
||||
|
@ -185,84 +185,100 @@ system.cpu.itb.write_acv 0 # DT
|
|||
system.cpu.itb.write_hits 0 # DTB write hits
|
||||
system.cpu.itb.write_misses 0 # DTB write misses
|
||||
system.cpu.l2cache.ReadExReq_accesses 1748 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52413.043478 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40003.432494 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 91618000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52296.624714 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40005.720824 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 91414500 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_misses 1748 # number of ReadExReq misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 69926000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 69930000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 1748 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.ReadReq_accesses 9041 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52240.613777 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40013.548808 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 5978 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 160013000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.338790 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_accesses 9043 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52161.443030 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40020.078355 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 5980 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 159770500 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.338715 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 3063 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 122561500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.338790 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 122581500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.338715 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 3063 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.UpgradeReq_accesses 111 # number of UpgradeReq accesses(hits+misses)
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 52414.414414 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 5818000 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 52216.216216 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40009.009009 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 5796000 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_misses 111 # number of UpgradeReq misses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 4440000 # number of UpgradeReq MSHR miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 4441000 # number of UpgradeReq MSHR miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_misses 111 # number of UpgradeReq MSHR misses
|
||||
system.cpu.l2cache.Writeback_accesses 104 # number of Writeback accesses(hits+misses)
|
||||
system.cpu.l2cache.Writeback_hits 104 # number of Writeback hits
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.avg_refs 1.968317 # Average number of references to valid blocks.
|
||||
system.cpu.l2cache.avg_refs 1.968977 # Average number of references to valid blocks.
|
||||
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 10789 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52303.263355 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40009.873207 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 5978 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 251631000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.445917 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_accesses 10791 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52210.559135 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40014.861775 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 5980 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 251185000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.445834 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 4811 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 192487500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.445917 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 192511500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.445834 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 4811 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.overall_accesses 10789 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52303.263355 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40009.873207 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_accesses 10791 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52210.559135 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40014.861775 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 5978 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 251631000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.445917 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_hits 5980 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 251185000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.445834 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 4811 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 192487500 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.445917 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 192511500 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.445834 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 4811 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.l2cache.replacements 0 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 3030 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 2039.371088 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 5964 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.tagsinuse 2038.814805 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 5966 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 0 # number of writebacks
|
||||
system.cpu.numCycles 202536123 # number of cpu cycles simulated
|
||||
system.cpu.numCycles 197073489 # number of cpu cycles simulated
|
||||
system.cpu.runCycles 190655578 # Number of cycles cpu stages are processed.
|
||||
system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode
|
||||
system.cpu.smt_cpi no_value # CPI: Total SMT-CPI
|
||||
system.cpu.smt_ipc no_value # IPC: Total SMT-IPC
|
||||
system.cpu.threadCycles 202536123 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.stage-0.idleCycles 98359969 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-0.runCycles 98713520 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-0.utilization 50.089700 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-1.idleCycles 104416341 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-1.runCycles 92657148 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-1.utilization 47.016546 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-2.idleCycles 103581004 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-2.runCycles 93492485 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-2.utilization 47.440417 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-3.idleCycles 170536358 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-3.runCycles 26537131 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-3.utilization 13.465602 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-4.idleCycles 105170433 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-4.runCycles 91903056 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-4.utilization 46.633901 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.threadCycles 197073489 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.workload.PROG:num_syscalls 389 # Number of system calls
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
|
|
@ -63,6 +63,7 @@ progress_interval=0
|
|||
stageTracing=false
|
||||
stageWidth=1
|
||||
system=system
|
||||
threadModel=SMT
|
||||
tracer=system.cpu.tracer
|
||||
workload=system.cpu.workload
|
||||
dcache_port=system.cpu.dcache.cpu_side
|
||||
|
@ -78,7 +79,6 @@ hash_delay=1
|
|||
latency=1000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=10000
|
||||
|
@ -113,7 +113,6 @@ hash_delay=1
|
|||
latency=1000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=10000
|
||||
|
@ -148,7 +147,6 @@ hash_delay=1
|
|||
latency=10000
|
||||
max_miss_count=0
|
||||
mshrs=10
|
||||
prefetch_cache_check_push=true
|
||||
prefetch_data_accesses_only=false
|
||||
prefetch_degree=1
|
||||
prefetch_latency=100000
|
||||
|
@ -190,7 +188,7 @@ egid=100
|
|||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=tests/test-progs/hello/bin/alpha/linux/hello
|
||||
executable=/dist/m5/regression/test-progs/hello/bin/alpha/linux/hello
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
|
|
@ -5,13 +5,13 @@ The Regents of The University of Michigan
|
|||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled May 12 2009 11:18:39
|
||||
M5 revision 21550d38f156 6195 default qtip tip inorder-hello-regress
|
||||
M5 started May 12 2009 11:18:40
|
||||
M5 compiled Jan 29 2010 09:13:03
|
||||
M5 revision 23ae96d82d21+ 6704+ default qtip tip inorder_hello_alpha
|
||||
M5 started Jan 29 2010 09:13:04
|
||||
M5 executing on zooks
|
||||
command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
info: Increasing stack size by one page.
|
||||
Hello world!
|
||||
Exiting @ tick 31646000 because target called exit()
|
||||
Exiting @ tick 31286000 because target called exit()
|
||||
|
|
|
@ -1,53 +1,53 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 23793 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 152032 # Number of bytes of host memory used
|
||||
host_seconds 0.27 # Real time elapsed on the host
|
||||
host_tick_rate 117464960 # Simulator tick rate (ticks/s)
|
||||
host_inst_rate 23048 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 153228 # Number of bytes of host memory used
|
||||
host_seconds 0.28 # Real time elapsed on the host
|
||||
host_tick_rate 112412599 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 6404 # Number of instructions simulated
|
||||
sim_seconds 0.000032 # Number of seconds simulated
|
||||
sim_ticks 31646000 # Number of ticks simulated
|
||||
sim_seconds 0.000031 # Number of seconds simulated
|
||||
sim_ticks 31286000 # Number of ticks simulated
|
||||
system.cpu.AGEN-Unit.instReqsProcessed 2050 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 6405 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 909 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 142 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 6405 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 6581 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 924 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 143 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 6581 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.cyclesExecuted 4340 # Number of Cycles Execution Unit was used.
|
||||
system.cpu.Execution-Unit.instReqsProcessed 4354 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.predictedNotTakenIncorrect 607 # Number of Branches Incorrectly Predicted As Not Taken).
|
||||
system.cpu.Execution-Unit.predictedTakenIncorrect 124 # Number of Branches Incorrectly Predicted As Taken.
|
||||
system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 13560 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.predictedNotTakenIncorrect 608 # Number of Branches Incorrectly Predicted As Not Taken).
|
||||
system.cpu.Execution-Unit.predictedTakenIncorrect 123 # Number of Branches Incorrectly Predicted As Taken.
|
||||
system.cpu.Execution-Unit.utilization 0.069359 # Utilization of Execution Unit (cycles / totalCycles).
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 13858 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Graduation-Unit.instReqsProcessed 6404 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.divInstReqsProcessed 0 # Number of Divide Requests Processed.
|
||||
system.cpu.Mult-Div-Unit.instReqsProcessed 2 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.multInstReqsProcessed 1 # Number of Multiply Requests Processed.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 12884 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 19961 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.activity 22.407428 # Percentage of cycles cpu is active
|
||||
system.cpu.committedInsts 6404 # Number of Instructions Simulated (Per-Thread)
|
||||
system.cpu.committedInsts_total 6404 # Number of Instructions Simulated (Total)
|
||||
system.cpu.cpi 9.883354 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 9.883354 # CPI: Total CPI of All Threads
|
||||
system.cpu.contextSwitches 1 # Number of context switches
|
||||
system.cpu.cpi 9.770924 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 9.770924 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 1185 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 56352.631579 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53352.631579 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 56347.368421 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53347.368421 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 1090 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 5353500 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_latency 5353000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.080169 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 95 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 5068500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 5068000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.080169 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 95 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_accesses 865 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56419.540230 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53419.540230 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56074.712644 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53074.712644 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 778 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 4908500 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 4878500 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.100578 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 87 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 4647500 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 4617500 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.100578 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 87 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
|
@ -59,29 +59,29 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 #
|
|||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 2050 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 56384.615385 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 53384.615385 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_avg_miss_latency 56217.032967 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 53217.032967 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 1868 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 10262000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_latency 10231500 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.088780 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 182 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 9716000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_latency 9685500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.088780 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 182 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.overall_accesses 2050 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 56384.615385 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 53384.615385 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_miss_latency 56217.032967 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 53217.032967 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 1868 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 10262000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_latency 10231500 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.088780 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 182 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 9716000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_latency 9685500 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.088780 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 182 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -89,7 +89,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0
|
|||
system.cpu.dcache.replacements 0 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 168 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 104.325446 # Cycle average of tags in use
|
||||
system.cpu.dcache.tagsinuse 103.689640 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 1882 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 0 # number of writebacks
|
||||
|
@ -110,70 +110,71 @@ system.cpu.dtb.write_accesses 868 # DT
|
|||
system.cpu.dtb.write_acv 0 # DTB write access violations
|
||||
system.cpu.dtb.write_hits 865 # DTB write hits
|
||||
system.cpu.dtb.write_misses 3 # DTB write misses
|
||||
system.cpu.icache.ReadReq_accesses 7155 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 55763.605442 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 52949.122807 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 6861 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 16394500 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.041090 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 294 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 9 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 15090500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.039832 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_accesses 7277 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 55521.594684 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 52863.157895 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 6976 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 16712000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.041363 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 301 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_hits 16 # number of ReadReq MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 15066000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.039164 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 285 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 24.158451 # Average number of references to valid blocks.
|
||||
system.cpu.icache.avg_refs 24.563380 # Average number of references to valid blocks.
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 7155 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 55763.605442 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 52949.122807 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 6861 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 16394500 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.041090 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 294 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 9 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 15090500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.039832 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_accesses 7277 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 55521.594684 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 52863.157895 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 6976 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 16712000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.041363 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 301 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 16 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 15066000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.039164 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 285 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.overall_accesses 7155 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 55763.605442 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 52949.122807 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_accesses 7277 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 55521.594684 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 52863.157895 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 6861 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 16394500 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.041090 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 294 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 9 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 15090500 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.039832 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_hits 6976 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 16712000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.041363 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 301 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 16 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 15066000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.039164 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 285 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.icache.replacements 0 # number of replacements
|
||||
system.cpu.icache.sampled_refs 284 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 131.383181 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 6861 # Total number of references to valid blocks.
|
||||
system.cpu.icache.tagsinuse 130.373495 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 6976 # Total number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.icache_port.instReqsProcessed 7153 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.ipc 0.101180 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.101180 # IPC: Total IPC of All Threads
|
||||
system.cpu.icache_port.instReqsProcessed 7275 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.idleCycles 48552 # Number of cycles cpu's stages were not processed
|
||||
system.cpu.ipc 0.102344 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.102344 # IPC: Total IPC of All Threads
|
||||
system.cpu.itb.data_accesses 0 # DTB accesses
|
||||
system.cpu.itb.data_acv 0 # DTB access violations
|
||||
system.cpu.itb.data_hits 0 # DTB hits
|
||||
system.cpu.itb.data_misses 0 # DTB misses
|
||||
system.cpu.itb.fetch_accesses 7172 # ITB accesses
|
||||
system.cpu.itb.fetch_accesses 7294 # ITB accesses
|
||||
system.cpu.itb.fetch_acv 0 # ITB acv
|
||||
system.cpu.itb.fetch_hits 7155 # ITB hits
|
||||
system.cpu.itb.fetch_hits 7277 # ITB hits
|
||||
system.cpu.itb.fetch_misses 17 # ITB misses
|
||||
system.cpu.itb.read_accesses 0 # DTB read accesses
|
||||
system.cpu.itb.read_acv 0 # DTB read access violations
|
||||
|
@ -184,28 +185,28 @@ system.cpu.itb.write_acv 0 # DT
|
|||
system.cpu.itb.write_hits 0 # DTB write hits
|
||||
system.cpu.itb.write_misses 0 # DTB write misses
|
||||
system.cpu.l2cache.ReadExReq_accesses 73 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52424.657534 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52075.342466 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40013.698630 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 3827000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 3801500 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_misses 73 # number of ReadExReq misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2921000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 73 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.ReadReq_accesses 380 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52118.733509 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39944.591029 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52068.601583 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39945.910290 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 19753000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_latency 19734000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.997368 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 379 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 15139000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 15139500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.997368 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 379 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses)
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 52357.142857 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 52071.428571 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 733000 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 729000 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 560000 # number of UpgradeReq MSHR miss cycles
|
||||
|
@ -220,29 +221,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 #
|
|||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 453 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52168.141593 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 39955.752212 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52069.690265 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 39956.858407 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 23580000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_latency 23535500 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.997792 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 452 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 18060000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 18060500 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.997792 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 452 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.overall_accesses 453 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52168.141593 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 39955.752212 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52069.690265 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 39956.858407 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 1 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 23580000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_latency 23535500 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.997792 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 452 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 18060000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 18060500 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.997792 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 452 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -250,16 +251,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0
|
|||
system.cpu.l2cache.replacements 0 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 364 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 182.840902 # Cycle average of tags in use
|
||||
system.cpu.l2cache.tagsinuse 181.532273 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 0 # number of writebacks
|
||||
system.cpu.numCycles 63293 # number of cpu cycles simulated
|
||||
system.cpu.numCycles 62573 # number of cpu cycles simulated
|
||||
system.cpu.runCycles 14021 # Number of cycles cpu stages are processed.
|
||||
system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode
|
||||
system.cpu.smt_cpi no_value # CPI: Total SMT-CPI
|
||||
system.cpu.smt_ipc no_value # IPC: Total SMT-IPC
|
||||
system.cpu.threadCycles 63293 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.stage-0.idleCycles 55279 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-0.runCycles 7294 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-0.utilization 11.656785 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-1.idleCycles 55992 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-1.runCycles 6581 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-1.utilization 10.517316 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-2.idleCycles 56103 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-2.runCycles 6470 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-2.utilization 10.339923 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-3.idleCycles 60520 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-3.runCycles 2053 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-3.utilization 3.280968 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-4.idleCycles 56169 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-4.runCycles 6404 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-4.utilization 10.234446 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.threadCycles 62573 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.workload.PROG:num_syscalls 17 # Number of system calls
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
|
|
@ -117,6 +117,7 @@ progress_interval=0
|
|||
stageTracing=false
|
||||
stageWidth=1
|
||||
system=system
|
||||
threadModel=SMT
|
||||
tracer=system.cpu.tracer
|
||||
workload=system.cpu.workload
|
||||
dcache_port=system.cpu.dcache.cpu_side
|
||||
|
|
|
@ -5,13 +5,13 @@ The Regents of The University of Michigan
|
|||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Jan 2 2010 07:01:31
|
||||
M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
|
||||
M5 started Jan 2 2010 07:03:09
|
||||
M5 executing on fajita
|
||||
command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing
|
||||
M5 compiled Jan 31 2010 17:08:14
|
||||
M5 revision 01508015f86b 6964 default qtip tip inorder_hello_mips
|
||||
M5 started Jan 31 2010 17:08:15
|
||||
M5 executing on zooks
|
||||
command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
info: Increasing stack size by one page.
|
||||
Hello World!
|
||||
Exiting @ tick 29940500 because target called exit()
|
||||
Exiting @ tick 29206500 because target called exit()
|
||||
|
|
|
@ -1,96 +1,96 @@
|
|||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 10400 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 205896 # Number of bytes of host memory used
|
||||
host_seconds 0.56 # Real time elapsed on the host
|
||||
host_tick_rate 53415864 # Simulator tick rate (ticks/s)
|
||||
host_inst_rate 19644 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 155856 # Number of bytes of host memory used
|
||||
host_seconds 0.30 # Real time elapsed on the host
|
||||
host_tick_rate 98307932 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 5827 # Number of instructions simulated
|
||||
sim_seconds 0.000030 # Number of seconds simulated
|
||||
sim_ticks 29940500 # Number of ticks simulated
|
||||
sim_seconds 0.000029 # Number of seconds simulated
|
||||
sim_ticks 29206500 # Number of ticks simulated
|
||||
system.cpu.AGEN-Unit.instReqsProcessed 2090 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Branch-Predictor.predictedNotTaken 826 # Number of Branches Predicted As Not Taken (False).
|
||||
system.cpu.Branch-Predictor.predictedTaken 90 # Number of Branches Predicted As Taken (True).
|
||||
system.cpu.Decode-Unit.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.cyclesExecuted 3725 # Number of Cycles Execution Unit was used.
|
||||
system.cpu.Execution-Unit.instReqsProcessed 3734 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.predictedNotTakenIncorrect 541 # Number of Branches Incorrectly Predicted As Not Taken).
|
||||
system.cpu.Execution-Unit.predictedTakenIncorrect 35 # Number of Branches Incorrectly Predicted As Taken.
|
||||
system.cpu.Fetch-Buffer-T0.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T0.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Buffer-T1.instReqsProcessed 0 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Fetch-Buffer-T1.instsBypassed 0 # Number of Instructions Bypassed.
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 11657 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Execution-Unit.utilization 0.063769 # Utilization of Execution Unit (cycles / totalCycles).
|
||||
system.cpu.Fetch-Seq-Unit.instReqsProcessed 11702 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Graduation-Unit.instReqsProcessed 5827 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.divInstReqsProcessed 1 # Number of Divide Requests Processed.
|
||||
system.cpu.Mult-Div-Unit.instReqsProcessed 8 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.Mult-Div-Unit.multInstReqsProcessed 3 # Number of Multiply Requests Processed.
|
||||
system.cpu.RegFile-Manager.instReqsProcessed 10713 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.activity 20.277673 # Percentage of cycles cpu is active
|
||||
system.cpu.committedInsts 5827 # Number of Instructions Simulated (Per-Thread)
|
||||
system.cpu.committedInsts_total 5827 # Number of Instructions Simulated (Total)
|
||||
system.cpu.cpi 10.276643 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 10.276643 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 1165 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 56201.149425 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53201.149425 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 1078 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 4889500 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.074678 # miss rate for ReadReq accesses
|
||||
system.cpu.contextSwitches 1 # Number of context switches
|
||||
system.cpu.cpi 10.024713 # CPI: Cycles Per Instruction (Per-Thread)
|
||||
system.cpu.cpi_total 10.024713 # CPI: Total CPI of All Threads
|
||||
system.cpu.dcache.ReadReq_accesses 1164 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 56229.885057 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53229.885057 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 1077 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 4892000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.074742 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 87 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 4628500 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.074678 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 4631000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.074742 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56554.687500 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53554.687500 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 56265.625000 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53265.625000 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 861 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 3619500 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 3601000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.069189 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 64 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 3427500 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 3409000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.069189 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_refs 14.144928 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.avg_refs 14.137681 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 2090 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 56350.993377 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 1939 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 8509000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.072249 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_accesses 2089 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 56245.033113 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 53245.033113 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 1938 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 8493000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.072283 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 151 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 8056000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.072249 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_miss_latency 8040000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.072283 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 151 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.overall_accesses 2090 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 56350.993377 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 53350.993377 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_accesses 2089 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 56245.033113 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 53245.033113 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 1939 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 8509000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.072249 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_hits 1938 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 8493000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.072283 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 151 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 8056000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.072249 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_miss_latency 8040000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.072283 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 151 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.dcache.replacements 0 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 88.212490 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 1952 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.tagsinuse 88.491296 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 1951 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 0 # number of writebacks
|
||||
system.cpu.dcache_port.instReqsProcessed 2089 # Number of Instructions Requests that completed in this resource.
|
||||
|
@ -103,62 +103,63 @@ system.cpu.dtb.read_misses 0 # DT
|
|||
system.cpu.dtb.write_accesses 0 # DTB write accesses
|
||||
system.cpu.dtb.write_hits 0 # DTB write hits
|
||||
system.cpu.dtb.write_misses 0 # DTB write misses
|
||||
system.cpu.icache.ReadReq_accesses 5829 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 55765.676568 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 52765.676568 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 5526 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 16897000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.051981 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_accesses 5874 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 55801.980198 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 52801.980198 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 5571 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 16908000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.051583 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 303 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 15988000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.051981 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 15999000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.051583 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 303 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 18.237624 # Average number of references to valid blocks.
|
||||
system.cpu.icache.avg_refs 18.386139 # Average number of references to valid blocks.
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 5829 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 55765.676568 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 5526 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 16897000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.051981 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_accesses 5874 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 55801.980198 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 52801.980198 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 5571 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 16908000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.051583 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 303 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 15988000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.051981 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_miss_latency 15999000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.051583 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 303 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.overall_accesses 5829 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 55765.676568 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 52765.676568 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_accesses 5874 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 55801.980198 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 52801.980198 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 5526 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 16897000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.051981 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_hits 5571 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 16908000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.051583 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 303 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 15988000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.051981 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_miss_latency 15999000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.051583 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 303 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.icache.replacements 13 # number of replacements
|
||||
system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 134.267603 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 5526 # Total number of references to valid blocks.
|
||||
system.cpu.icache.tagsinuse 135.362853 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 5571 # Total number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.icache_port.instReqsProcessed 5828 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.ipc 0.097308 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.097308 # IPC: Total IPC of All Threads
|
||||
system.cpu.icache_port.instReqsProcessed 5873 # Number of Instructions Requests that completed in this resource.
|
||||
system.cpu.idleCycles 46569 # Number of cycles cpu's stages were not processed
|
||||
system.cpu.ipc 0.099753 # IPC: Instructions Per Cycle (Per-Thread)
|
||||
system.cpu.ipc_total 0.099753 # IPC: Total IPC of All Threads
|
||||
system.cpu.itb.accesses 0 # DTB accesses
|
||||
system.cpu.itb.hits 0 # DTB hits
|
||||
system.cpu.itb.misses 0 # DTB misses
|
||||
|
@ -169,31 +170,31 @@ system.cpu.itb.write_accesses 0 # DT
|
|||
system.cpu.itb.write_hits 0 # DTB write hits
|
||||
system.cpu.itb.write_misses 0 # DTB write misses
|
||||
system.cpu.l2cache.ReadExReq_accesses 51 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52500 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52264.705882 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40098.039216 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 2677500 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 2665500 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_misses 51 # number of ReadExReq misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2045000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 51 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.ReadReq_accesses 390 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52052.835052 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40023.195876 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52091.494845 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40048.969072 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 20196500 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_latency 20211500 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.994872 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 388 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 15529000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 15539000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994872 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 388 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.UpgradeReq_accesses 13 # number of UpgradeReq accesses(hits+misses)
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 52538.461538 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40076.923077 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 683000 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_avg_miss_latency 52192.307692 # average UpgradeReq miss latency
|
||||
system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40153.846154 # average UpgradeReq mshr miss latency
|
||||
system.cpu.l2cache.UpgradeReq_miss_latency 678500 # number of UpgradeReq miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_misses 13 # number of UpgradeReq misses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 521000 # number of UpgradeReq MSHR miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_latency 522000 # number of UpgradeReq MSHR miss cycles
|
||||
system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses
|
||||
system.cpu.l2cache.UpgradeReq_mshr_misses 13 # number of UpgradeReq MSHR misses
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
|
@ -205,29 +206,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 #
|
|||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 441 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52104.783599 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52111.617312 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40054.669704 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 22874000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_latency 22877000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.995465 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 439 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 17574000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 17584000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.995465 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 439 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.overall_accesses 441 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52104.783599 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40031.890661 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52111.617312 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40054.669704 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 2 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 22874000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_latency 22877000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.995465 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 439 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 17574000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 17584000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.995465 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 439 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
|
@ -235,16 +236,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0
|
|||
system.cpu.l2cache.replacements 0 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 375 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 185.807591 # Cycle average of tags in use
|
||||
system.cpu.l2cache.tagsinuse 187.032260 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 0 # number of writebacks
|
||||
system.cpu.numCycles 59882 # number of cpu cycles simulated
|
||||
system.cpu.numCycles 58414 # number of cpu cycles simulated
|
||||
system.cpu.runCycles 11845 # Number of cycles cpu stages are processed.
|
||||
system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
|
||||
system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode
|
||||
system.cpu.smt_cpi no_value # CPI: Total SMT-CPI
|
||||
system.cpu.smt_ipc no_value # IPC: Total SMT-IPC
|
||||
system.cpu.threadCycles 59882 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.stage-0.idleCycles 52540 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-0.runCycles 5874 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-0.utilization 10.055809 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-1.idleCycles 52586 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-1.runCycles 5828 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-1.utilization 9.977060 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-2.idleCycles 52582 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-2.runCycles 5832 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-2.utilization 9.983908 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-3.idleCycles 56324 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-3.runCycles 2090 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-3.utilization 3.577909 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.stage-4.idleCycles 52587 # Number of cycles 0 instructions are processed.
|
||||
system.cpu.stage-4.runCycles 5827 # Number of cycles 1+ instructions are processed.
|
||||
system.cpu.stage-4.utilization 9.975348 # Percentage of cycles stage was utilized (processing insts).
|
||||
system.cpu.threadCycles 58414 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
|
||||
system.cpu.workload.PROG:num_syscalls 8 # Number of system calls
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
|
Loading…
Reference in a new issue