Add new function profiling stuff, wrap the pc_sample stuff into it.

SConscript:
    Get rid of the pc_sample stuff and move to the new profiling stuff
base/traceflags.py:
    DPRINTF Stack stuff
cpu/base.cc:
cpu/base.hh:
cpu/exec_context.cc:
cpu/exec_context.hh:
cpu/simple/cpu.cc:
    Add profiling stuff
kern/kernel_stats.hh:
    Use a smart pointer
sim/system.cc:
sim/system.hh:
    Create a new symbol table that has all of the symbols for a
    particular system
util/stats/categories.py:
    change around the categories, add categories for function
    profiling stuff
util/stats/profile.py:
    No profile parsing and display code to deal with function
    profiling stuff, graph, dot, and text outputs.

--HG--
extra : convert_revision : b3de0cdc8bd468e42647966e2640ae009bda9eb8
This commit is contained in:
Nathan Binkert 2005-10-18 19:07:42 -04:00
parent 357ee7a845
commit a81c03737a
16 changed files with 1533 additions and 330 deletions

View file

@ -143,7 +143,6 @@ base_sources = Split('''
encumbered/cpu/full/issue.cc
encumbered/cpu/full/ls_queue.cc
encumbered/cpu/full/machine_queue.cc
encumbered/cpu/full/pc_sample_profile.cc
encumbered/cpu/full/pipetrace.cc
encumbered/cpu/full/readyq.cc
encumbered/cpu/full/reg_info.cc
@ -241,6 +240,7 @@ full_system_sources = Split('''
arch/alpha/ev5.cc
arch/alpha/osfpal.cc
arch/alpha/pseudo_inst.cc
arch/alpha/stacktrace.cc
arch/alpha/vtophys.cc
base/crc.cc
@ -248,6 +248,7 @@ full_system_sources = Split('''
base/remote_gdb.cc
cpu/intr_control.cc
cpu/profile.cc
dev/alpha_console.cc
dev/baddev.cc
@ -345,6 +346,7 @@ targetarch_files = Split('''
isa_traits.hh
osfpal.hh
pseudo_inst.hh
stacktrace.hh
vptr.hh
vtophys.hh
''')

324
arch/alpha/stacktrace.cc Normal file
View file

@ -0,0 +1,324 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include "arch/alpha/isa_traits.hh"
#include "arch/alpha/stacktrace.hh"
#include "arch/alpha/vtophys.hh"
#include "base/bitfield.hh"
#include "base/trace.hh"
#include "cpu/base.hh"
#include "cpu/exec_context.hh"
using namespace std;
ProcessInfo::ProcessInfo(ExecContext *_xc)
: xc(_xc)
{
Addr addr = 0;
if (!xc->system->kernelSymtab->findAddress("thread_info_size", addr))
panic("thread info not compiled into kernel\n");
thread_info_size = *(int32_t *)vtomem(xc, addr, sizeof(int32_t));
if (!xc->system->kernelSymtab->findAddress("task_struct_size", addr))
panic("thread info not compiled into kernel\n");
task_struct_size = *(int32_t *)vtomem(xc, addr, sizeof(int32_t));
if (!xc->system->kernelSymtab->findAddress("thread_info_task", addr))
panic("thread info not compiled into kernel\n");
task_off = *(int32_t *)vtomem(xc, addr, sizeof(int32_t));
if (!xc->system->kernelSymtab->findAddress("task_struct_pid", addr))
panic("thread info not compiled into kernel\n");
pid_off = *(int32_t *)vtomem(xc, addr, sizeof(int32_t));
if (!xc->system->kernelSymtab->findAddress("task_struct_comm", addr))
panic("thread info not compiled into kernel\n");
name_off = *(int32_t *)vtomem(xc, addr, sizeof(int32_t));
}
Addr
ProcessInfo::task(Addr ksp) const
{
Addr base = ksp & ~0x3fff;
if (base == ULL(0xfffffc0000000000))
return 0;
Addr task;
CopyOut(xc, &task, base + task_off, sizeof(task));
return task;
}
int
ProcessInfo::pid(Addr ksp) const
{
Addr task = this->task(ksp);
if (!task)
return -1;
uint16_t pid;
CopyOut(xc, &pid, task + pid_off, sizeof(pid));
return pid;
}
string
ProcessInfo::name(Addr ksp) const
{
Addr task = this->task(ksp);
if (!task)
return "console";
char comm[256];
CopyString(xc, comm, task + name_off, sizeof(comm));
if (!comm[0])
return "startup";
return comm;
}
StackTrace::StackTrace(ExecContext *_xc, bool is_call)
: xc(_xc)
{
bool usermode = (xc->regs.ipr[AlphaISA::IPR_DTB_CM] & 0x18) != 0;
Addr pc = xc->regs.npc;
bool kernel = xc->system->kernelStart <= pc && pc <= xc->system->kernelEnd;
if (usermode) {
stack.push_back(1);
return;
}
if (!kernel) {
stack.push_back(2);
return;
}
SymbolTable *symtab = xc->system->allSymtab;
Addr ksp = xc->regs.intRegFile[TheISA::StackPointerReg];
Addr bottom = ksp & ~0x3fff;
Addr addr;
if (is_call) {
if (!symtab->findNearestAddr(pc, addr))
panic("could not find address %#x", pc);
stack.push_back(addr);
pc = xc->regs.pc;
}
Addr ra;
int size;
while (ksp > bottom) {
if (!symtab->findNearestAddr(pc, addr))
panic("could not find symbol for pc=%#x", pc);
assert(pc >= addr && "symbol botch: callpc < func");
stack.push_back(addr);
if (isEntry(addr))
return;
if (decodePrologue(ksp, pc, addr, size, ra)) {
if (!ra)
return;
pc = ra;
ksp += size;
} else {
stack.push_back(3);
return;
}
bool kernel = xc->system->kernelStart <= pc &&
pc <= xc->system->kernelEnd;
if (!kernel)
return;
}
panic("unwinding too far");
}
StackTrace::~StackTrace()
{
}
bool
StackTrace::isEntry(Addr addr)
{
if (addr == xc->regs.ipr[AlphaISA::IPR_PALtemp12])
return true;
if (addr == xc->regs.ipr[AlphaISA::IPR_PALtemp7])
return true;
if (addr == xc->regs.ipr[AlphaISA::IPR_PALtemp11])
return true;
if (addr == xc->regs.ipr[AlphaISA::IPR_PALtemp21])
return true;
if (addr == xc->regs.ipr[AlphaISA::IPR_PALtemp9])
return true;
if (addr == xc->regs.ipr[AlphaISA::IPR_PALtemp2])
return true;
return false;
}
bool
StackTrace::decodeStack(MachInst inst, int &disp)
{
// lda $sp, -disp($sp)
//
// Opcode<31:26> == 0x08
// RA<25:21> == 30
// RB<20:16> == 30
// Disp<15:0>
const MachInst mem_mask = 0xffff0000;
const MachInst lda_pattern = 0x23de0000;
const MachInst lda_disp_mask = 0x0000ffff;
// subq $sp, disp, $sp
// addq $sp, disp, $sp
//
// Opcode<31:26> == 0x10
// RA<25:21> == 30
// Lit<20:13>
// One<12> = 1
// Func<11:5> == 0x20 (addq)
// Func<11:5> == 0x29 (subq)
// RC<4:0> == 30
const MachInst intop_mask = 0xffe01fff;
const MachInst addq_pattern = 0x43c0141e;
const MachInst subq_pattern = 0x43c0153e;
const MachInst intop_disp_mask = 0x001fe000;
const int intop_disp_shift = 13;
if ((inst & mem_mask) == lda_pattern)
disp = -sext<16>(inst & lda_disp_mask);
else if ((inst & intop_mask) == addq_pattern)
disp = -int((inst & intop_disp_mask) >> intop_disp_shift);
else if ((inst & intop_mask) == subq_pattern)
disp = int((inst & intop_disp_mask) >> intop_disp_shift);
else
return false;
return true;
}
bool
StackTrace::decodeSave(MachInst inst, int &reg, int &disp)
{
// lda $stq, disp($sp)
//
// Opcode<31:26> == 0x08
// RA<25:21> == ?
// RB<20:16> == 30
// Disp<15:0>
const MachInst stq_mask = 0xfc1f0000;
const MachInst stq_pattern = 0xb41e0000;
const MachInst stq_disp_mask = 0x0000ffff;
const MachInst reg_mask = 0x03e00000;
const int reg_shift = 21;
if ((inst & stq_mask) == stq_pattern) {
reg = (inst & reg_mask) >> reg_shift;
disp = sext<16>(inst & stq_disp_mask);
} else {
return false;
}
return true;
}
/*
* Decode the function prologue for the function we're in, and note
* which registers are stored where, and how large the stack frame is.
*/
bool
StackTrace::decodePrologue(Addr sp, Addr callpc, Addr func,
int &size, Addr &ra)
{
size = 0;
ra = 0;
for (Addr pc = func; pc < callpc; pc += sizeof(MachInst)) {
MachInst inst;
CopyOut(xc, (uint8_t *)&inst, pc, sizeof(MachInst));
int reg, disp;
if (decodeStack(inst, disp)) {
if (size) {
// panic("decoding frame size again");
return true;
}
size += disp;
} else if (decodeSave(inst, reg, disp)) {
if (!ra && reg == ReturnAddressReg) {
CopyOut(xc, (uint8_t *)&ra, sp + disp, sizeof(Addr));
if (!ra) {
// panic("no return address value pc=%#x\n", pc);
return false;
}
}
}
}
return true;
}
#if TRACING_ON
void
StackTrace::dump()
{
StringWrap name(xc->cpu->name());
SymbolTable *symtab = xc->system->allSymtab;
DPRINTFN("------ Stack ------\n");
string symbol;
for (int i = 0, size = stack.size(); i < size; ++i) {
Addr addr = stack[size - i - 1];
if (addr == 1)
symbol = "user";
else if (addr == 2)
symbol = "console";
else if (addr == 3)
symbol = "unknown";
else
symtab->findSymbol(addr, symbol);
DPRINTFN("%#x: %s\n", addr, symbol);
}
}
#endif

99
arch/alpha/stacktrace.hh Normal file
View file

@ -0,0 +1,99 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_ALPHA_STACKTRACE_HH__
#define __ARCH_ALPHA_STACKTRACE_HH__
#include "base/trace.hh"
#include "cpu/static_inst.hh"
class ExecContext;
class StackTrace;
class SymbolTable;
class ProcessInfo
{
private:
ExecContext *xc;
int thread_info_size;
int task_struct_size;
int task_off;
int pid_off;
int name_off;
public:
ProcessInfo(ExecContext *_xc);
Addr task(Addr ksp) const;
int pid(Addr ksp) const;
std::string name(Addr ksp) const;
};
class StackTrace
{
private:
ExecContext *xc;
std::vector<Addr> stack;
private:
bool isEntry(Addr addr);
bool decodePrologue(Addr sp, Addr callpc, Addr func, int &size, Addr &ra);
bool decodeSave(MachInst inst, int &reg, int &disp);
bool decodeStack(MachInst inst, int &disp);
public:
StackTrace(ExecContext *xc, bool is_call);
~StackTrace();
public:
const std::vector<Addr> &getstack() const { return stack; }
static StackTrace *create(ExecContext *xc, StaticInstPtr<TheISA> inst);
#if TRACING_ON
private:
void dump();
public:
void dprintf() { if (DTRACE(Stack)) dump(); }
#else
public:
void dprintf() {}
#endif
};
inline StackTrace *
StackTrace::create(ExecContext *xc, StaticInstPtr<TheISA> inst)
{
if (!inst->isCall() && !inst->isReturn())
return NULL;
return new StackTrace(xc, !inst->isReturn());
}
#endif // __ARCH_ALPHA_STACKTRACE_HH__

View file

@ -140,7 +140,8 @@ baseFlags = [
'FullCPU',
'CommitRate',
'OoOCPU',
'HWPrefetch'
'HWPrefetch',
'Stack',
]
#

View file

@ -142,8 +142,19 @@ BaseCPU::BaseCPU(Params *p)
e->schedule(p->functionTraceStart);
}
}
#if FULL_SYSTEM
profileEvent = NULL;
if (params->profile)
profileEvent = new ProfileEvent(this, params->profile);
#endif
}
BaseCPU::Params::Params()
{
#if FULL_SYSTEM
profile = false;
#endif
}
void
BaseCPU::enableFunctionTrace()
@ -162,6 +173,16 @@ BaseCPU::init()
registerExecContexts();
}
void
BaseCPU::startup()
{
#if FULL_SYSTEM
if (!params->deferRegistration && profileEvent)
profileEvent->schedule(curTick);
#endif
}
void
BaseCPU::regStats()
{
@ -231,11 +252,32 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
for (int i = 0; i < NumInterruptLevels; ++i)
interrupts[i] = oldCPU->interrupts[i];
intstatus = oldCPU->intstatus;
for (int i = 0; i < execContexts.size(); ++i)
execContexts[i]->profile->clear();
if (profileEvent)
profileEvent->schedule(curTick);
#endif
}
#if FULL_SYSTEM
BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, int _interval)
: Event(&mainEventQueue), cpu(_cpu), interval(_interval)
{ }
void
BaseCPU::ProfileEvent::process()
{
for (int i = 0, size = cpu->execContexts.size(); i < size; ++i) {
ExecContext *xc = cpu->execContexts[i];
xc->profile->sample(xc->profileNode, xc->profilePC);
}
schedule(curTick + interval);
}
void
BaseCPU::post_interrupt(int int_num, int index)
{

View file

@ -33,6 +33,7 @@
#include "base/statistics.hh"
#include "config/full_system.hh"
#include "cpu/profile.hh"
#include "cpu/sampler/sampler.hh"
#include "sim/eventq.hh"
#include "sim/sim_object.hh"
@ -76,6 +77,18 @@ class BaseCPU : public SimObject
bool check_interrupts() const { return intstatus != 0; }
uint64_t intr_status() const { return intstatus; }
class ProfileEvent : public Event
{
private:
BaseCPU *cpu;
int interval;
public:
ProfileEvent(BaseCPU *cpu, int interval);
void process();
};
ProfileEvent *profileEvent;
#endif
protected:
@ -113,7 +126,10 @@ class BaseCPU : public SimObject
#if FULL_SYSTEM
System *system;
int cpu_id;
Tick profile;
#endif
Params();
};
const Params *params;
@ -122,6 +138,7 @@ class BaseCPU : public SimObject
virtual ~BaseCPU();
virtual void init();
virtual void startup();
virtual void regStats();
void registerExecContexts();

View file

@ -32,10 +32,15 @@
#include "cpu/exec_context.hh"
#if FULL_SYSTEM
#include "base/callback.hh"
#include "base/cprintf.hh"
#include "base/output.hh"
#include "cpu/profile.hh"
#include "kern/kernel_stats.hh"
#include "sim/serialize.hh"
#include "sim/sim_exit.hh"
#include "sim/system.hh"
#include "targetarch/stacktrace.hh"
#else
#include "sim/process.hh"
#endif
@ -51,10 +56,24 @@ ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num, System *_sys,
cpu_id(-1), mem(_mem), itb(_itb), dtb(_dtb), system(_sys),
memctrl(_sys->memctrl), physmem(_sys->physmem),
kernelBinning(system->kernelBinning), bin(kernelBinning->bin),
fnbin(kernelBinning->fnbin), func_exe_inst(0), storeCondFailures(0)
fnbin(kernelBinning->fnbin), profile(NULL),
func_exe_inst(0), storeCondFailures(0)
{
kernelStats = new Kernel::Statistics(this);
memset(&regs, 0, sizeof(RegFile));
if (cpu->params->profile) {
profile = new FunctionProfile(system->allSymtab);
Callback *cb =
new MakeCallback<ExecContext, &ExecContext::dumpFuncProfile>(this);
registerExitCallback(cb);
}
// let's fill with a dummy node for now so we don't get a segfault
// on the first cycle when there's no node available.
static ProfileNode dummyNode;
profileNode = &dummyNode;
profilePC = 3;
}
#else
ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num,
@ -83,6 +102,14 @@ ExecContext::~ExecContext()
#endif
}
#if FULL_SYSTEM
void
ExecContext::dumpFuncProfile()
{
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
profile->dump(this, *os);
}
#endif
void
ExecContext::takeOverFrom(ExecContext *oldContext)
@ -106,15 +133,6 @@ ExecContext::takeOverFrom(ExecContext *oldContext)
oldContext->_status = ExecContext::Unallocated;
}
#if FULL_SYSTEM
void
ExecContext::execute(const StaticInstBase *inst)
{
assert(kernelStats);
system->kernelBinning->execute(this, inst);
}
#endif
void
ExecContext::serialize(ostream &os)
{

View file

@ -46,8 +46,9 @@ class BaseCPU;
#include "sim/system.hh"
#include "targetarch/alpha_memory.hh"
class FunctionProfile;
class ProfileNode;
class MemoryController;
class StaticInstBase;
namespace Kernel { class Binning; class Statistics; }
#else // !FULL_SYSTEM
@ -138,7 +139,11 @@ class ExecContext
Kernel::Statistics *kernelStats;
bool bin;
bool fnbin;
void execute(const StaticInstBase *inst);
FunctionProfile *profile;
ProfileNode *profileNode;
Addr profilePC;
void dumpFuncProfile();
#else
Process *process;

147
cpu/profile.cc Normal file
View file

@ -0,0 +1,147 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include "base/bitfield.hh"
#include "base/trace.hh"
#include "cpu/base.hh"
#include "cpu/exec_context.hh"
#include "cpu/profile.hh"
using namespace std;
ProfileNode::ProfileNode()
: count(0)
{ }
void
ProfileNode::dump(const string &symbol, uint64_t id, const SymbolTable *symtab,
ostream &os) const
{
ccprintf(os, "%#x %s %d ", id, symbol, count);
ChildList::const_iterator i, end = children.end();
for (i = children.begin(); i != end; ++i) {
const ProfileNode &node = i->second;
ccprintf(os, "%#x ", (intptr_t)&node);
}
ccprintf(os, "\n");
for (i = children.begin(); i != end; ++i) {
Addr addr = i->first;
string symbol;
if (addr == 1)
symbol = "user";
else if (addr == 2)
symbol = "console";
else if (addr == 3)
symbol = "unknown";
else if (!symtab->findSymbol(addr, symbol))
panic("could not find symbol for address %#x\n", addr);
const ProfileNode &node = i->second;
node.dump(symbol, (intptr_t)&node, symtab, os);
}
}
void
ProfileNode::clear()
{
count = 0;
ChildList::iterator i, end = children.end();
for (i = children.begin(); i != end; ++i) {
ProfileNode &node = i->second;
node.clear();
}
}
FunctionProfile::FunctionProfile(const SymbolTable *_symtab)
: symtab(_symtab)
{
}
FunctionProfile::~FunctionProfile()
{
}
ProfileNode *
FunctionProfile::consume(const StackTrace *trace)
{
const vector<Addr> &stack = trace->getstack();
ProfileNode *current = &top;
for (int i = 0, size = stack.size(); i < size; ++i)
current = &current->children[stack[size - i - 1]];
return current;
}
void
FunctionProfile::clear()
{
top.clear();
pc_count.clear();
}
void
FunctionProfile::dump(ExecContext *xc, ostream &os) const
{
ccprintf(os, ">>>PC data\n");
map<Addr, Counter>::const_iterator i, end = pc_count.end();
for (i = pc_count.begin(); i != end; ++i) {
Addr pc = i->first;
Counter count = i->second;
std::string symbol;
if (pc == 1)
ccprintf(os, "user %d\n", count);
else if (symtab->findSymbol(pc, symbol) && !symbol.empty())
ccprintf(os, "%s %d\n", symbol, count);
else
ccprintf(os, "%#x %d\n", pc, count);
}
ccprintf(os, ">>>function data\n");
top.dump("top", 0, symtab, os);
}
void
FunctionProfile::sample(ProfileNode *node, Addr pc)
{
node->count++;
Addr symaddr;
if (symtab->findNearestAddr(pc, symaddr)) {
pc_count[symaddr]++;
} else {
// record PC even if we don't have a symbol to avoid
// silently biasing the histogram
pc_count[pc]++;
}
}

74
cpu/profile.hh Normal file
View file

@ -0,0 +1,74 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_PROFILE_HH__
#define __CPU_PROFILE_HH__
#include <map>
#include "cpu/static_inst.hh"
#include "sim/host.hh"
#include "targetarch/stacktrace.hh"
class ProfileNode
{
private:
friend class FunctionProfile;
typedef std::map<Addr, ProfileNode> ChildList;
ChildList children;
public:
int count;
public:
ProfileNode();
void dump(const std::string &symbol, uint64_t id,
const SymbolTable *symtab, std::ostream &os) const;
void clear();
};
class FunctionProfile
{
private:
const SymbolTable *symtab;
ProfileNode top;
std::map<Addr, Counter> pc_count;
public:
FunctionProfile(const SymbolTable *symtab);
~FunctionProfile();
ProfileNode *consume(const StackTrace *trace);
void clear();
void dump(ExecContext *xc, std::ostream &out) const;
void sample(ProfileNode *node, Addr pc);
};
#endif // __CPU_PROFILE_HH__

View file

@ -50,6 +50,7 @@
#include "cpu/simple/cpu.hh"
#include "cpu/smt.hh"
#include "cpu/static_inst.hh"
#include "kern/kernel_stats.hh"
#include "mem/base_mem.hh"
#include "mem/mem_interface.hh"
#include "sim/builder.hh"
@ -65,6 +66,7 @@
#include "mem/functional/physical.hh"
#include "sim/system.hh"
#include "targetarch/alpha_memory.hh"
#include "targetarch/stacktrace.hh"
#include "targetarch/vtophys.hh"
#else // !FULL_SYSTEM
#include "mem/functional/functional.hh"
@ -753,8 +755,21 @@ SimpleCPU::tick()
fault = curStaticInst->execute(this, traceData);
#if FULL_SYSTEM
if (xc->fnbin)
xc->execute(curStaticInst.get());
if (xc->fnbin) {
assert(xc->kernelStats);
system->kernelBinning->execute(xc, inst);
}
if (xc->profile) {
bool usermode = (xc->regs.ipr[AlphaISA::IPR_DTB_CM] & 0x18) != 0;
xc->profilePC = usermode ? 1 : xc->regs.pc;
StackTrace *trace = StackTrace::create(xc, inst);
if (trace) {
xc->profileNode = xc->profile->consume(trace);
trace->dprintf();
delete trace;
}
}
#endif
if (curStaticInst->isMemRef()) {
@ -806,7 +821,6 @@ SimpleCPU::tick()
tickEvent.schedule(curTick + cycles(1));
}
////////////////////////////////////////////////////////////////////////
//
// SimpleCPU Simulation Object
@ -824,6 +838,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
SimObjectParam<FunctionalMemory *> mem;
SimObjectParam<System *> system;
Param<int> cpu_id;
Param<Tick> profile;
#else
SimObjectParam<Process *> workload;
#endif // FULL_SYSTEM
@ -856,6 +871,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
INIT_PARAM(mem, "memory"),
INIT_PARAM(system, "system object"),
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "processes to run"),
#endif // FULL_SYSTEM
@ -894,6 +910,7 @@ CREATE_SIM_OBJECT(SimpleCPU)
params->mem = mem;
params->system = system;
params->cpu_id = cpu_id;
params->profile = profile;
#else
params->process = workload;
#endif

View file

@ -34,11 +34,12 @@
#include <string>
#include <vector>
#include "cpu/static_inst.hh"
class BaseCPU;
class ExecContext;
class FnEvent;
// What does kernel stats expect is included?
class StaticInstBase;
class System;
enum Fault;
@ -105,7 +106,7 @@ class Binning
cpu_mode themode;
void palSwapContext(ExecContext *xc);
void execute(ExecContext *xc, const StaticInstBase *inst);
void execute(ExecContext *xc, StaticInstPtr<TheISA> inst);
void call(ExecContext *xc, Stats::MainBin *myBin);
void changeMode(cpu_mode mode);

View file

@ -54,6 +54,7 @@ System::System(Params *p)
kernelSymtab = new SymbolTable;
consoleSymtab = new SymbolTable;
palSymtab = new SymbolTable;
allSymtab = new SymbolTable;
debugSymbolTable = new SymbolTable;
/**
@ -101,6 +102,21 @@ System::System(Params *p)
if (!pal->loadLocalSymbols(palSymtab))
panic("could not load pal symbols\n");
if (!kernel->loadGlobalSymbols(allSymtab))
panic("could not load kernel symbols\n");
if (!kernel->loadLocalSymbols(allSymtab))
panic("could not load kernel local symbols\n");
if (!console->loadGlobalSymbols(allSymtab))
panic("could not load console symbols\n");
if (!pal->loadGlobalSymbols(allSymtab))
panic("could not load pal symbols\n");
if (!pal->loadLocalSymbols(allSymtab))
panic("could not load pal symbols\n");
if (!kernel->loadGlobalSymbols(debugSymbolTable))
panic("could not load kernel symbols\n");

View file

@ -77,6 +77,9 @@ class System : public SimObject
/** pal symbol table */
SymbolTable *palSymtab;
/** all symbols table */
SymbolTable *allSymtab;
/** Object pointer for the kernel code */
ObjectFile *kernel;

File diff suppressed because it is too large Load diff

View file

@ -27,103 +27,227 @@
from orderdict import orderdict
import output
class ProfileData(object):
def __init__(self):
self.data = {}
self.total = {}
self.runs = orderdict()
self.runlist = []
class RunData(dict):
def __init__(self, filename=None):
self.filename = filename
def addvalue(self, run, cpu, symbol, value):
value = float(value)
self.data[run, cpu, symbol] = self.getvalue(run, cpu, symbol) + value
self.total[run, cpu] = self.gettotal(run, cpu) + value
if run not in self.runs:
self.runs[run] = orderdict()
def __getattr__(self, attr):
if attr == 'total':
total = 0.0
for value in self.itervalues():
total += value
return total
if attr == 'maxsymlen':
return max([ len(sym) for sym in self.iterkeys() ])
if cpu not in self.runs[run]:
self.runs[run][cpu] = {}
def display(self, output=None, limit=None, maxsymlen=None):
if not output:
import sys
output = sys.stdout
elif isinstance(output, str):
output = file(output, 'w')
if symbol not in self.runs[run][cpu]:
self.runs[run][cpu][symbol] = 0
total = float(self.total)
self.runs[run][cpu][symbol] += value
# swap (string,count) order so we can sort on count
symbols = [ (count,name) for name,count in self.iteritems() ]
symbols.sort(reverse=True)
if limit is not None:
symbols = symbols[:limit]
def getvalue(self, run, cpu, symbol):
return self.data.get((run, cpu, symbol), 0)
if not maxsymlen:
maxsymlen = self.maxsymlen
def gettotal(self, run, cpu):
return self.total.get((run, cpu), 0)
symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
for number,name in symbols:
print >>output, symbolf % (name, 100.0 * (float(number) / total))
class Profile(object):
default_order = ['ste', 'hte', 'htd', 'ocm', 'occ', 'ocp']
# This list controls the order of values in stacked bar data output
default_categories = [ 'interrupt',
'driver',
'stack',
'bufmgt',
'copy',
'user',
'other',
'idle']
def __init__(self, run_order=[], categories=[], stacknames=[]):
if not run_order:
run_order = Profile.default_order
if not categories:
categories = Profile.default_categories
class PCData(RunData):
def __init__(self, filename=None, categorize=None, showidle=True):
super(PCData, self).__init__(self, filename)
if filename is None:
return
self.run_order = run_order
self.categories = categories
self.rcategories = []
self.rcategories.extend(categories)
self.rcategories.reverse()
self.stacknames = stacknames
self.prof = ProfileData()
self.categorize = True
self.showidle = True
self.maxsymlen = 0
def category(self, symbol):
from categories import categories, categories_re
if categories.has_key(symbol):
return categories[symbol]
for regexp, cat in categories_re:
if regexp.match(symbol):
return cat
return 'other'
# Parse input file and put the results in the given run and cpu
def parsefile(self, run, cpu, filename):
fd = file(filename)
for line in fd:
if line.strip() == '>>>PC data':
break
for line in fd:
if line.startswith('>>>'):
break
(symbol, count) = line.split()
if symbol == "0x0":
continue
count = int(count)
if self.categorize:
symbol = self.category(symbol)
if symbol == 'idle' and not self.showidle:
if categorize is not None:
category = categorize(symbol)
if category is None:
category = 'other'
elif category == 'idle' and not showidle:
continue
if symbol not in self.categories:
symbol = 'other'
self.maxsymlen = max(self.maxsymlen, len(symbol))
self.prof.addvalue(run, cpu, symbol, count)
self[category] = count
fd.close()
class FuncNode(object):
def __new__(cls, filename = None):
if filename is None:
return super(FuncNode, cls).__new__(cls)
fd = file(filename, 'r')
fditer = iter(fd)
nodes = {}
for line in fditer:
if line.strip() == '>>>function data':
break
for line in fditer:
if line.startswith('>>>'):
break
data = line.split()
node_id = int(data[0], 16)
node = FuncNode()
node.symbol = data[1]
node.count = int(data[2])
node.children = [ int(child, 16) for child in data[3:] ]
nodes[node_id] = node
for node in nodes.itervalues():
children = []
for cid in node.children:
child = nodes[cid]
children.append(child)
child.parent = node
node.children = tuple(children)
if not nodes:
print filename
print nodes
return nodes[0]
def __init__(self, filename=None):
pass
def total(self):
total = self.count
for child in self.children:
total += child.total()
return total
def aggregate(self, dict, categorize, incategory):
category = None
if categorize:
category = categorize(self.symbol)
total = self.count
for child in self.children:
total += child.aggregate(dict, categorize, category or incategory)
if category:
dict[category] = dict.get(category, 0) + total
return 0
elif not incategory:
dict[self.symbol] = dict.get(self.symbol, 0) + total
return total
def dump(self):
kids = [ child.symbol for child in self.children]
print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
for child in self.children:
child.dump()
def _dot(self, dot, threshold, categorize, total):
from pydot import Dot, Edge, Node
self.dot_node = None
value = self.total() * 100.0 / total
if value < threshold:
return
if categorize:
category = categorize(self.symbol)
if category and category != 'other':
return
label = '%s %.2f%%' % (self.symbol, value)
self.dot_node = Node(self, label=label)
dot.add_node(self.dot_node)
for child in self.children:
child._dot(dot, threshold, categorize, total)
if child.dot_node is not None:
dot.add_edge(Edge(self, child))
def _cleandot(self):
for child in self.children:
child._cleandot()
self.dot_node = None
del self.__dict__['dot_node']
def dot(self, dot, threshold=0.1, categorize=None):
self._dot(dot, threshold, categorize, self.total())
self._cleandot()
class FuncData(RunData):
def __init__(self, filename, categorize=None):
super(FuncData, self).__init__(filename)
self.tree = FuncNode(filename)
self.tree.aggregate(self, categorize, incategory=False)
self.total = self.tree.total()
def displayx(self, output=None, maxcount=None):
if output is None:
import sys
output = sys.stdout
items = [ (val,key) for key,val in self.iteritems() ]
items.sort(reverse=True)
for val,key in items:
if maxcount is not None:
if maxcount == 0:
return
maxcount -= 1
percent = val * 100.0 / self.total
print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
class Profile(object):
# This list controls the order of values in stacked bar data output
default_categories = [ 'interrupt',
'driver',
'stack',
'buffer',
'copy',
'syscall',
'user',
'other',
'idle']
def __init__(self, datatype, categorize=None):
categories = Profile.default_categories
self.datatype = datatype
self.categorize = categorize
self.data = {}
self.categories = categories[:]
self.rcategories = categories[:]
self.rcategories.reverse()
self.cpu = 0
# Read in files
def inputdir(self, directory):
import os, os.path, re
from os.path import expanduser, join as joinpath
directory = expanduser(directory)
label_ex = re.compile(r'm5prof\.(.*)')
label_ex = re.compile(r'profile\.(.*).dat')
for root,dirs,files in os.walk(directory):
for name in files:
match = label_ex.match(name)
@ -133,14 +257,230 @@ class Profile(object):
filename = joinpath(root, name)
prefix = os.path.commonprefix([root, directory])
dirname = root[len(prefix)+1:]
self.parsefile(dirname, match.group(1), filename)
data = self.datatype(filename, self.categorize)
self.setdata(dirname, match.group(1), data)
def setdata(self, run, cpu, data):
if run not in self.data:
self.data[run] = {}
if cpu in self.data[run]:
raise AttributeError, \
'data already stored for run %s and cpu %s' % (run, cpu)
self.data[run][cpu] = data
def getdata(self, run, cpu):
try:
return self.data[run][cpu]
except KeyError:
return None
def alldata(self):
for run,cpus in self.data.iteritems():
for cpu,data in cpus.iteritems():
yield run,cpu,data
def get(self, job, stat):
if job.system is None:
raise AttributeError, 'The job must have a system set'
cpu = '%s.full0' % job.system
data = self.getdata(job.name, '%s.full%d' % (job.system, self.cpu))
if not data:
return [ 0.0 for c in self.categories ]
values = []
for cat in self.categories:
values.append(self.prof.getvalue(job.name, cpu, cat))
for category in self.categories:
values.append(data.get(category, 0.0))
return values
def dump(self):
for run,cpu,data in self.alldata():
print 'run %s, cpu %s' % (run, cpu)
data.dump()
print
def write_dot(self, threshold, jobfile=None, jobs=None):
import pydot
if jobs is None:
jobs = [ job for job in jobfile.jobs() ]
for job in jobs:
cpu = '%s.full%d' % (job.system, self.cpu)
symbols = self.getdata(job.name, cpu)
if not symbols:
continue
dot = pydot.Dot()
symbols.tree.dot(dot, threshold=threshold)
dot.write(symbols.filename[:-3] + 'dot')
def write_txt(self, jobfile=None, jobs=None):
if jobs is None:
jobs = [ job for job in jobfile.jobs() ]
for job in jobs:
cpu = '%s.full%d' % (job.system, self.cpu)
symbols = self.getdata(job.name, cpu)
if not symbols:
continue
output = file(symbols.filename[:-3] + 'txt', 'w')
symbols.display(output)
def display(self, jobfile=None, jobs=None, limit=None):
if jobs is None:
jobs = [ job for job in jobfile.jobs() ]
maxsymlen = 0
thejobs = []
for job in jobs:
cpu = '%s.full%d' % (job.system, self.cpu)
symbols = self.getdata(job.name, cpu)
if symbols:
thejobs.append(job)
maxsymlen = max(maxsymlen, symbols.maxsymlen)
for job in thejobs:
cpu = '%s.full%d' % (job.system, self.cpu)
symbols = self.getdata(job.name, cpu)
print job.name
symbols.display(limit=limit, maxsymlen=maxsymlen)
print
from categories import func_categorize, pc_categorize
class PCProfile(Profile):
def __init__(self, categorize=pc_categorize):
super(PCProfile, self).__init__(PCData, categorize)
class FuncProfile(Profile):
def __init__(self, categorize=func_categorize):
super(FuncProfile, self).__init__(FuncData, categorize)
def usage(exitcode = None):
print '''\
Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
-c groups symbols into categories
-b dumps data for bar charts
-d generate dot output
-g <d> draw graphs and send output to <d>
-j <jobfile> specify a different jobfile (default is Test.py)
-n <n> selects number of top symbols to print (default 5)
''' % sys.argv[0]
if exitcode is not None:
sys.exit(exitcode)
if __name__ == '__main__':
import getopt, re, sys
from os.path import expanduser
from output import StatOutput
from jobfile import JobFile
# default option values
numsyms = 10
graph = None
cpus = [ 0 ]
categorize = False
showidle = True
funcdata = True
jobfilename = 'Test.py'
dodot = False
dotformat = 'raw'
textout = False
threshold = 0.01
inputfile = None
try:
opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
except getopt.GetoptError:
usage(2)
for o,a in opts:
if o == '-C':
cpus = [ int(x) for x in a.split(',') ]
elif o == '-c':
categorize = True
elif o == '-D':
dotformat = a
elif o == '-d':
dodot = True
elif o == '-f':
inputfile = expanduser(a)
elif o == '-g':
graph = a
elif o == '-i':
showidle = False
elif o == '-j':
jobfilename = a
elif o == '-n':
numsyms = int(a)
elif o == '-p':
funcdata = False
elif o == '-T':
threshold = float(a)
elif o == '-t':
textout = True
if args:
print "'%s'" % args, len(args)
usage(1)
if inputfile:
data = FuncData(inputfile)
if dodot:
import pydot
dot = pydot.Dot()
data.dot(dot, threshold=threshold)
#dot.orientation = 'landscape'
#dot.ranksep='equally'
#dot.rank='samerank'
dot.write(dotfile, format=dotformat)
else:
data.display(limit=numsyms)
else:
jobfile = JobFile(jobfilename)
if funcdata:
profile = FuncProfile()
else:
profile = PCProfile()
profile.inputdir(jobfile.rootdir)
if graph:
for cpu in cpus:
profile.cpu = cpu
if funcdata:
name = 'funcstacks%d' % cpu
else:
name = 'stacks%d' % cpu
output = StatOutput(name, jobfile, info=profile)
output.graph(graph)
if dodot:
for cpu in cpus:
profile.cpu = cpu
profile.write_dot(jobfile=jobfile, threshold=threshold)
if not categorize:
for cpu in cpus:
profile.cpu = cpu
profile.categorize = None
if textout:
for cpu in cpus:
profile.cpu = cpu
profile.write_txt(jobfile=jobfile)
if not graph and not textout and not dodot:
for cpu in cpus:
profile.cpu = cpu
profile.display(jobfile=jobfile, limit=numsyms)