Support profiling.
--HG-- extra : convert_revision : eab02dea68442bd3f8c5d1d16b7f93f43cbda2a5
This commit is contained in:
parent
74e8abd37e
commit
ad2fa1e1c9
7 changed files with 100 additions and 14 deletions
|
@ -153,15 +153,6 @@ AlphaFullCPU<Impl>::regStats()
|
|||
this->commit.regStats();
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
template <class Impl>
|
||||
void
|
||||
AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
|
||||
{
|
||||
// Currently not supported
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
|
||||
|
@ -334,15 +325,26 @@ AlphaFullCPU<Impl>::AlphaXC::readLastSuspend()
|
|||
return thread->lastSuspend;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
|
||||
{
|
||||
thread->dumpFuncProfile();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
AlphaFullCPU<Impl>::AlphaXC::profileClear()
|
||||
{}
|
||||
{
|
||||
thread->profileClear();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
AlphaFullCPU<Impl>::AlphaXC::profileSample()
|
||||
{}
|
||||
{
|
||||
thread->profileSample();
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class Impl>
|
||||
|
|
|
@ -1035,6 +1035,20 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
|
|||
|
||||
updateComInstStats(head_inst);
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (thread[tid]->profile) {
|
||||
// bool usermode =
|
||||
// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
|
||||
// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
|
||||
thread[tid]->profilePC = head_inst->readPC();
|
||||
ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
|
||||
head_inst->staticInst);
|
||||
|
||||
if (node)
|
||||
thread[tid]->profileNode = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (head_inst->traceData) {
|
||||
head_inst->traceData->setFetchSeq(head_inst->seqNum);
|
||||
head_inst->traceData->setCPSeq(thread[tid]->numInst);
|
||||
|
|
|
@ -31,8 +31,11 @@
|
|||
|
||||
#include "arch/faults.hh"
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "cpu/exec_context.hh"
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
|
||||
class Event;
|
||||
class Process;
|
||||
|
@ -83,8 +86,22 @@ struct O3ThreadState : public ThreadState {
|
|||
#if FULL_SYSTEM
|
||||
O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
|
||||
: ThreadState(-1, _thread_num, _mem),
|
||||
inSyscall(0), trapPending(0)
|
||||
{ }
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
{
|
||||
if (cpu->params->profile) {
|
||||
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
|
||||
Callback *cb =
|
||||
new MakeCallback<O3ThreadState,
|
||||
&O3ThreadState::dumpFuncProfile>(this);
|
||||
registerExitCallback(cb);
|
||||
}
|
||||
|
||||
// let's fill with a dummy node for now so we don't get a segfault
|
||||
// on the first cycle when there's no node available.
|
||||
static ProfileNode dummyNode;
|
||||
profileNode = &dummyNode;
|
||||
profilePC = 3;
|
||||
}
|
||||
#else
|
||||
O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
|
||||
: ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
|
||||
|
@ -138,6 +155,14 @@ struct O3ThreadState : public ThreadState {
|
|||
/** Handles the syscall. */
|
||||
void syscall() { process->syscall(xcProxy); }
|
||||
#endif
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void dumpFuncProfile()
|
||||
{
|
||||
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
|
||||
profile->dump(xcProxy, *os);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_THREAD_STATE_HH__
|
||||
|
|
|
@ -31,9 +31,12 @@
|
|||
|
||||
#include "arch/faults.hh"
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "cpu/exec_context.hh"
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "sim/process.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
|
||||
class Event;
|
||||
//class Process;
|
||||
|
@ -62,9 +65,22 @@ struct OzoneThreadState : public ThreadState {
|
|||
#if FULL_SYSTEM
|
||||
OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
|
||||
: ThreadState(-1, _thread_num, _mem),
|
||||
inSyscall(0), trapPending(0)
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
{
|
||||
memset(®s, 0, sizeof(TheISA::RegFile));
|
||||
if (cpu->params->profile) {
|
||||
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
|
||||
Callback *cb =
|
||||
new MakeCallback<OzoneThreadState,
|
||||
&OzoneThreadState::dumpFuncProfile>(this);
|
||||
registerExitCallback(cb);
|
||||
}
|
||||
|
||||
// let's fill with a dummy node for now so we don't get a segfault
|
||||
// on the first cycle when there's no node available.
|
||||
static ProfileNode dummyNode;
|
||||
profileNode = &dummyNode;
|
||||
profilePC = 3;
|
||||
}
|
||||
#else
|
||||
OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
|
||||
|
@ -187,6 +203,14 @@ struct OzoneThreadState : public ThreadState {
|
|||
Counter readFuncExeInst() { return funcExeInst; }
|
||||
|
||||
void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void dumpFuncProfile()
|
||||
{
|
||||
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
|
||||
profile->dump(xcProxy, *os);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // __CPU_OZONE_THREAD_STATE_HH__
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#define __CPU_THREAD_STATE_HH__
|
||||
|
||||
#include "cpu/exec_context.hh"
|
||||
#include "cpu/profile.hh"
|
||||
|
||||
#if FULL_SYSTEM
|
||||
class EndQuiesceEvent;
|
||||
|
@ -103,6 +104,20 @@ struct ThreadState {
|
|||
|
||||
#endif
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void profileClear()
|
||||
{
|
||||
if (profile)
|
||||
profile->clear();
|
||||
}
|
||||
|
||||
void profileSample()
|
||||
{
|
||||
if (profile)
|
||||
profile->sample(profileNode, profilePC);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Temporary storage to pass the source address from copy_load to
|
||||
* copy_store.
|
||||
|
|
|
@ -10,6 +10,8 @@ class DerivAlphaFullCPU(BaseCPU):
|
|||
mem = Param.FunctionalMemory(NULL, "memory")
|
||||
|
||||
checker = Param.BaseCPU(NULL, "checker")
|
||||
if build_env['FULL_SYSTEM']:
|
||||
profile = Param.Latency('0ns', "trace the kernel stack")
|
||||
|
||||
cachePorts = Param.Unsigned("Cache Ports")
|
||||
|
||||
|
|
|
@ -10,9 +10,12 @@ class DerivOzoneCPU(BaseCPU):
|
|||
mem = Param.FunctionalMemory(NULL, "memory")
|
||||
|
||||
checker = Param.BaseCPU("Checker CPU")
|
||||
if build_env['FULL_SYSTEM']:
|
||||
profile = Param.Latency('0ns', "trace the kernel stack")
|
||||
|
||||
width = Param.Unsigned("Width")
|
||||
frontEndWidth = Param.Unsigned("Front end width")
|
||||
frontEndLatency = Param.Unsigned("Front end latency")
|
||||
backEndWidth = Param.Unsigned("Back end width")
|
||||
backEndSquashLatency = Param.Unsigned("Back end squash latency")
|
||||
backEndLatency = Param.Unsigned("Back end latency")
|
||||
|
@ -75,6 +78,7 @@ class DerivOzoneCPU(BaseCPU):
|
|||
|
||||
LQEntries = Param.Unsigned("Number of load queue entries")
|
||||
SQEntries = Param.Unsigned("Number of store queue entries")
|
||||
lsqLimits = Param.Bool(True, "LSQ size limits dispatch")
|
||||
LFSTSize = Param.Unsigned("Last fetched store table size")
|
||||
SSITSize = Param.Unsigned("Store set ID table size")
|
||||
|
||||
|
|
Loading…
Reference in a new issue