From ad2fa1e1c9587e8c2a2b7f3e5a9c592312042eb4 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 24 Aug 2006 17:43:08 -0400 Subject: [PATCH] Support profiling. --HG-- extra : convert_revision : eab02dea68442bd3f8c5d1d16b7f93f43cbda2a5 --- cpu/o3/alpha_cpu_impl.hh | 24 +++++++++++++----------- cpu/o3/commit_impl.hh | 14 ++++++++++++++ cpu/o3/thread_state.hh | 29 +++++++++++++++++++++++++++-- cpu/ozone/thread_state.hh | 26 +++++++++++++++++++++++++- cpu/thread_state.hh | 15 +++++++++++++++ python/m5/objects/AlphaFullCPU.py | 2 ++ python/m5/objects/OzoneCPU.py | 4 ++++ 7 files changed, 100 insertions(+), 14 deletions(-) diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh index 1bf0652cd..071a870ef 100644 --- a/cpu/o3/alpha_cpu_impl.hh +++ b/cpu/o3/alpha_cpu_impl.hh @@ -153,15 +153,6 @@ AlphaFullCPU::regStats() this->commit.regStats(); } -#if FULL_SYSTEM -template -void -AlphaFullCPU::AlphaXC::dumpFuncProfile() -{ - // Currently not supported -} -#endif - template void AlphaFullCPU::AlphaXC::takeOverFrom(ExecContext *old_context) @@ -334,15 +325,26 @@ AlphaFullCPU::AlphaXC::readLastSuspend() return thread->lastSuspend; } +template +void +AlphaFullCPU::AlphaXC::dumpFuncProfile() +{ + thread->dumpFuncProfile(); +} + template void AlphaFullCPU::AlphaXC::profileClear() -{} +{ + thread->profileClear(); +} template void AlphaFullCPU::AlphaXC::profileSample() -{} +{ + thread->profileSample(); +} #endif template diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh index 364e685c2..cd10ec6b2 100644 --- a/cpu/o3/commit_impl.hh +++ b/cpu/o3/commit_impl.hh @@ -1035,6 +1035,20 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) updateComInstStats(head_inst); +#if FULL_SYSTEM + if (thread[tid]->profile) { +// bool usermode = +// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0; +// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC(); + thread[tid]->profilePC = head_inst->readPC(); + ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(), + head_inst->staticInst); + + if (node) + thread[tid]->profileNode = node; + } +#endif + if (head_inst->traceData) { head_inst->traceData->setFetchSeq(head_inst->seqNum); head_inst->traceData->setCPSeq(thread[tid]->numInst); diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh index 3f1208ea0..28f488143 100644 --- a/cpu/o3/thread_state.hh +++ b/cpu/o3/thread_state.hh @@ -31,8 +31,11 @@ #include "arch/faults.hh" #include "arch/isa_traits.hh" +#include "base/callback.hh" +#include "base/output.hh" #include "cpu/exec_context.hh" #include "cpu/thread_state.hh" +#include "sim/sim_exit.hh" class Event; class Process; @@ -83,8 +86,22 @@ struct O3ThreadState : public ThreadState { #if FULL_SYSTEM O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) : ThreadState(-1, _thread_num, _mem), - inSyscall(0), trapPending(0) - { } + cpu(_cpu), inSyscall(0), trapPending(0) + { + if (cpu->params->profile) { + profile = new FunctionProfile(cpu->params->system->kernelSymtab); + Callback *cb = + new MakeCallback(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; + } #else O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid), @@ -138,6 +155,14 @@ struct O3ThreadState : public ThreadState { /** Handles the syscall. */ void syscall() { process->syscall(xcProxy); } #endif + +#if FULL_SYSTEM + void dumpFuncProfile() + { + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(xcProxy, *os); + } +#endif }; #endif // __CPU_O3_THREAD_STATE_HH__ diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh index f104dff23..93a56da1b 100644 --- a/cpu/ozone/thread_state.hh +++ b/cpu/ozone/thread_state.hh @@ -31,9 +31,12 @@ #include "arch/faults.hh" #include "arch/isa_traits.hh" +#include "base/callback.hh" +#include "base/output.hh" #include "cpu/exec_context.hh" #include "cpu/thread_state.hh" #include "sim/process.hh" +#include "sim/sim_exit.hh" class Event; //class Process; @@ -62,9 +65,22 @@ struct OzoneThreadState : public ThreadState { #if FULL_SYSTEM OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) : ThreadState(-1, _thread_num, _mem), - inSyscall(0), trapPending(0) + cpu(_cpu), inSyscall(0), trapPending(0) { memset(®s, 0, sizeof(TheISA::RegFile)); + if (cpu->params->profile) { + profile = new FunctionProfile(cpu->params->system->kernelSymtab); + Callback *cb = + new MakeCallback(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; } #else OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) @@ -187,6 +203,14 @@ struct OzoneThreadState : public ThreadState { Counter readFuncExeInst() { return funcExeInst; } void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } + +#if FULL_SYSTEM + void dumpFuncProfile() + { + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(xcProxy, *os); + } +#endif }; #endif // __CPU_OZONE_THREAD_STATE_HH__ diff --git a/cpu/thread_state.hh b/cpu/thread_state.hh index 12146bd11..7a19963c8 100644 --- a/cpu/thread_state.hh +++ b/cpu/thread_state.hh @@ -30,6 +30,7 @@ #define __CPU_THREAD_STATE_HH__ #include "cpu/exec_context.hh" +#include "cpu/profile.hh" #if FULL_SYSTEM class EndQuiesceEvent; @@ -103,6 +104,20 @@ struct ThreadState { #endif +#if FULL_SYSTEM + void profileClear() + { + if (profile) + profile->clear(); + } + + void profileSample() + { + if (profile) + profile->sample(profileNode, profilePC); + } +#endif + /** * Temporary storage to pass the source address from copy_load to * copy_store. diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py index 015e9d872..5b6fa1063 100644 --- a/python/m5/objects/AlphaFullCPU.py +++ b/python/m5/objects/AlphaFullCPU.py @@ -10,6 +10,8 @@ class DerivAlphaFullCPU(BaseCPU): mem = Param.FunctionalMemory(NULL, "memory") checker = Param.BaseCPU(NULL, "checker") + if build_env['FULL_SYSTEM']: + profile = Param.Latency('0ns', "trace the kernel stack") cachePorts = Param.Unsigned("Cache Ports") diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py index ea8b6b537..dadca7990 100644 --- a/python/m5/objects/OzoneCPU.py +++ b/python/m5/objects/OzoneCPU.py @@ -10,9 +10,12 @@ class DerivOzoneCPU(BaseCPU): mem = Param.FunctionalMemory(NULL, "memory") checker = Param.BaseCPU("Checker CPU") + if build_env['FULL_SYSTEM']: + profile = Param.Latency('0ns', "trace the kernel stack") width = Param.Unsigned("Width") frontEndWidth = Param.Unsigned("Front end width") + frontEndLatency = Param.Unsigned("Front end latency") backEndWidth = Param.Unsigned("Back end width") backEndSquashLatency = Param.Unsigned("Back end squash latency") backEndLatency = Param.Unsigned("Back end latency") @@ -75,6 +78,7 @@ class DerivOzoneCPU(BaseCPU): LQEntries = Param.Unsigned("Number of load queue entries") SQEntries = Param.Unsigned("Number of store queue entries") + lsqLimits = Param.Bool(True, "LSQ size limits dispatch") LFSTSize = Param.Unsigned("Last fetched store table size") SSITSize = Param.Unsigned("Store set ID table size")