ARM: dump stats and process info on context switches
This patch enables dumping statistics and Linux process information on context switch boundaries (__switch_to() calls) that are used for Streamline integration (a graphical statistics viewer from ARM).
This commit is contained in:
parent
322daba74c
commit
81406018b0
9 changed files with 222 additions and 5 deletions
|
@ -71,3 +71,4 @@ class LinuxArmSystem(ArmSystem):
|
|||
"File that contains the Device Tree Blob. Don't use DTB if empty.")
|
||||
early_kernel_symbols = Param.Bool(False,
|
||||
"enable early kernel symbol tables before MMU")
|
||||
enable_context_switch_stats_dump = Param.Bool(False, "enable stats/task info dumping at context switch boundaries")
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 ARM Limited
|
||||
* Copyright (c) 2010-2012 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
|
@ -44,19 +44,24 @@
|
|||
#include "arch/arm/linux/system.hh"
|
||||
#include "arch/arm/isa_traits.hh"
|
||||
#include "arch/arm/utility.hh"
|
||||
#include "arch/generic/linux/threadinfo.hh"
|
||||
#include "base/loader/object_file.hh"
|
||||
#include "base/loader/symtab.hh"
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/pc_event.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "debug/Loader.hh"
|
||||
#include "kern/linux/events.hh"
|
||||
#include "mem/fs_translating_port_proxy.hh"
|
||||
#include "mem/physical.hh"
|
||||
#include "sim/stat_control.hh"
|
||||
|
||||
using namespace ArmISA;
|
||||
using namespace Linux;
|
||||
|
||||
LinuxArmSystem::LinuxArmSystem(Params *p)
|
||||
: ArmSystem(p)
|
||||
: ArmSystem(p),
|
||||
enableContextSwitchStatsDump(p->enable_context_switch_stats_dump)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
kernelPanicEvent = addKernelFuncEvent<BreakPCEvent>("panic");
|
||||
|
@ -206,6 +211,9 @@ LinuxArmSystem::~LinuxArmSystem()
|
|||
delete uDelaySkipEvent;
|
||||
if (constUDelaySkipEvent)
|
||||
delete constUDelaySkipEvent;
|
||||
|
||||
if (dumpStatsPCEvent)
|
||||
delete dumpStatsPCEvent;
|
||||
}
|
||||
|
||||
LinuxArmSystem *
|
||||
|
@ -213,3 +221,95 @@ LinuxArmSystemParams::create()
|
|||
{
|
||||
return new LinuxArmSystem(this);
|
||||
}
|
||||
|
||||
void
|
||||
LinuxArmSystem::startup()
|
||||
{
|
||||
if (enableContextSwitchStatsDump) {
|
||||
dumpStatsPCEvent = addKernelFuncEvent<DumpStatsPCEvent>("__switch_to");
|
||||
if (!dumpStatsPCEvent)
|
||||
panic("dumpStatsPCEvent not created!");
|
||||
|
||||
std::string task_filename = "tasks.txt";
|
||||
taskFile = simout.create(name() + "." + task_filename);
|
||||
|
||||
for (int i = 0; i < _numContexts; i++) {
|
||||
ThreadContext *tc = threadContexts[i];
|
||||
uint32_t pid = tc->getCpuPtr()->getPid();
|
||||
if (pid != Request::invldPid) {
|
||||
mapPid(tc, pid);
|
||||
tc->getCpuPtr()->taskId(taskMap[pid]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LinuxArmSystem::mapPid(ThreadContext *tc, uint32_t pid)
|
||||
{
|
||||
// Create a new unique identifier for this pid
|
||||
std::map<uint32_t, uint32_t>::iterator itr = taskMap.find(pid);
|
||||
if (itr == taskMap.end()) {
|
||||
uint32_t map_size = taskMap.size();
|
||||
if (map_size > ContextSwitchTaskId::MaxNormalTaskId + 1) {
|
||||
warn_once("Error out of identifiers for cache occupancy stats");
|
||||
taskMap[pid] = ContextSwitchTaskId::Unknown;
|
||||
} else {
|
||||
taskMap[pid] = map_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** This function is called whenever the the kernel function
|
||||
* "__switch_to" is called to change running tasks.
|
||||
*
|
||||
* r0 = task_struct of the previously running process
|
||||
* r1 = task_info of the previously running process
|
||||
* r2 = task_info of the next process to run
|
||||
*/
|
||||
void
|
||||
DumpStatsPCEvent::process(ThreadContext *tc)
|
||||
{
|
||||
Linux::ThreadInfo ti(tc);
|
||||
Addr task_descriptor = tc->readIntReg(2);
|
||||
uint32_t pid = ti.curTaskPID(task_descriptor);
|
||||
uint32_t tgid = ti.curTaskTGID(task_descriptor);
|
||||
std::string next_task_str = ti.curTaskName(task_descriptor);
|
||||
|
||||
// Streamline treats pid == -1 as the kernel process.
|
||||
// Also pid == 0 implies idle process (except during Linux boot)
|
||||
int32_t mm = ti.curTaskMm(task_descriptor);
|
||||
bool is_kernel = (mm == 0);
|
||||
if (is_kernel && (pid != 0)) {
|
||||
pid = -1;
|
||||
tgid = -1;
|
||||
next_task_str = "kernel";
|
||||
}
|
||||
|
||||
LinuxArmSystem* sys = dynamic_cast<LinuxArmSystem *>(tc->getSystemPtr());
|
||||
if (!sys) {
|
||||
panic("System is not LinuxArmSystem while getting Linux process info!");
|
||||
}
|
||||
std::map<uint32_t, uint32_t>& taskMap = sys->taskMap;
|
||||
|
||||
// Create a new unique identifier for this pid
|
||||
sys->mapPid(tc, pid);
|
||||
|
||||
// Set cpu task id, output process info, and dump stats
|
||||
tc->getCpuPtr()->taskId(taskMap[pid]);
|
||||
tc->getCpuPtr()->setPid(pid);
|
||||
|
||||
std::ostream* taskFile = sys->taskFile;
|
||||
|
||||
// Task file is read by cache occupancy plotting script or
|
||||
// Streamline conversion script.
|
||||
ccprintf(*taskFile,
|
||||
"tick=%lld %d cpu_id=%d next_pid=%d next_tgid=%d next_task=%s\n",
|
||||
curTick(), taskMap[pid], tc->cpuId(), (int) pid, (int) tgid,
|
||||
next_task_str);
|
||||
taskFile->flush();
|
||||
|
||||
// Dump and reset statistics
|
||||
Stats::schedStatEvent(true, true, curTick(), 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 ARM Limited
|
||||
* Copyright (c) 2010-2012 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
|
@ -43,15 +43,24 @@
|
|||
#ifndef __ARCH_ARM_LINUX_SYSTEM_HH__
|
||||
#define __ARCH_ARM_LINUX_SYSTEM_HH__
|
||||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/arm/system.hh"
|
||||
#include "base/output.hh"
|
||||
#include "kern/linux/events.hh"
|
||||
#include "params/LinuxArmSystem.hh"
|
||||
#include "sim/core.hh"
|
||||
|
||||
class DumpStatsPCEvent;
|
||||
|
||||
class LinuxArmSystem : public ArmSystem
|
||||
{
|
||||
protected:
|
||||
DumpStatsPCEvent *dumpStatsPCEvent;
|
||||
|
||||
public:
|
||||
/** Boilerplate params code */
|
||||
typedef LinuxArmSystemParams Params;
|
||||
|
@ -61,6 +70,20 @@ class LinuxArmSystem : public ArmSystem
|
|||
return dynamic_cast<const Params *>(_params);
|
||||
}
|
||||
|
||||
/** When enabled, dump stats/task info on context switches for
|
||||
* Streamline and per-thread cache occupancy studies, etc. */
|
||||
bool enableContextSwitchStatsDump;
|
||||
|
||||
/** This map stores a mapping of OS process IDs to internal Task IDs. The
|
||||
* mapping is done because the stats system doesn't tend to like vectors
|
||||
* that are much greater than 1000 items and the entire process space is
|
||||
* 65K. */
|
||||
std::map<uint32_t, uint32_t> taskMap;
|
||||
|
||||
/** This is a file that is placed in the run directory that prints out
|
||||
* mappings between taskIds and OS process IDs */
|
||||
std::ostream* taskFile;
|
||||
|
||||
LinuxArmSystem(Params *p);
|
||||
~LinuxArmSystem();
|
||||
|
||||
|
@ -68,6 +91,12 @@ class LinuxArmSystem : public ArmSystem
|
|||
|
||||
bool adderBootUncacheable(Addr a);
|
||||
|
||||
void startup();
|
||||
|
||||
/** This function creates a new task Id for the given pid.
|
||||
* @param tc thread context that is currentyl executing */
|
||||
void mapPid(ThreadContext* tc, uint32_t pid);
|
||||
|
||||
private:
|
||||
#ifndef NDEBUG
|
||||
/** Event to halt the simulator if the kernel calls panic() */
|
||||
|
@ -97,5 +126,16 @@ class LinuxArmSystem : public ArmSystem
|
|||
Addr penReleaseAddr;
|
||||
};
|
||||
|
||||
class DumpStatsPCEvent : public PCEvent
|
||||
{
|
||||
public:
|
||||
DumpStatsPCEvent(PCEventQueue *q, const std::string &desc, Addr addr)
|
||||
: PCEvent(q, desc, addr)
|
||||
{}
|
||||
|
||||
virtual void process(ThreadContext* tc);
|
||||
};
|
||||
|
||||
|
||||
#endif // __ARCH_ARM_LINUX_SYSTEM_HH__
|
||||
|
||||
|
|
|
@ -118,6 +118,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
|
|||
: MemObject(p), instCnt(0), _cpuId(p->cpu_id),
|
||||
_instMasterId(p->system->getMasterId(name() + ".inst")),
|
||||
_dataMasterId(p->system->getMasterId(name() + ".data")),
|
||||
_taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
|
||||
interrupts(p->interrupts), profileEvent(NULL),
|
||||
numThreads(p->numThreads), system(p->system)
|
||||
{
|
||||
|
@ -359,6 +360,8 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
|
|||
{
|
||||
assert(threadContexts.size() == oldCPU->threadContexts.size());
|
||||
assert(_cpuId == oldCPU->cpuId());
|
||||
_pid = oldCPU->getPid();
|
||||
_taskId = oldCPU->taskId();
|
||||
|
||||
ThreadID size = threadContexts.size();
|
||||
for (ThreadID i = 0; i < size; ++i) {
|
||||
|
@ -489,6 +492,13 @@ void
|
|||
BaseCPU::serialize(std::ostream &os)
|
||||
{
|
||||
SERIALIZE_SCALAR(instCnt);
|
||||
|
||||
/* Unlike _pid, _taskId is not serialized, as they are dynamically
|
||||
* assigned unique ids that are only meaningful for the duration of
|
||||
* a specific run. We will need to serialize the entire taskMap in
|
||||
* system. */
|
||||
SERIALIZE_SCALAR(_pid);
|
||||
|
||||
interrupts->serialize(os);
|
||||
}
|
||||
|
||||
|
@ -496,6 +506,7 @@ void
|
|||
BaseCPU::unserialize(Checkpoint *cp, const std::string §ion)
|
||||
{
|
||||
UNSERIALIZE_SCALAR(instCnt);
|
||||
UNSERIALIZE_SCALAR(_pid);
|
||||
interrupts->unserialize(cp, section);
|
||||
}
|
||||
|
||||
|
|
|
@ -103,6 +103,17 @@ class BaseCPU : public MemObject
|
|||
/** data side request id that must be placed in all requests */
|
||||
MasterID _dataMasterId;
|
||||
|
||||
/** An intrenal representation of a task identifier within gem5. This is
|
||||
* used so the CPU can add which taskId (which is an internal representation
|
||||
* of the OS process ID) to each request so components in the memory system
|
||||
* can track which process IDs are ultimately interacting with them
|
||||
*/
|
||||
uint32_t _taskId;
|
||||
|
||||
/** The current OS process ID that is executing on this processor. This is
|
||||
* used to generate a taskId */
|
||||
uint32_t _pid;
|
||||
|
||||
/**
|
||||
* Define a base class for the CPU ports (instruction and data)
|
||||
* that is refined in the subclasses. This class handles the
|
||||
|
@ -174,6 +185,14 @@ class BaseCPU : public MemObject
|
|||
BaseMasterPort &getMasterPort(const std::string &if_name,
|
||||
PortID idx = InvalidPortID);
|
||||
|
||||
/** Get cpu task id */
|
||||
uint32_t taskId() const { return _taskId; }
|
||||
/** Set cpu task id */
|
||||
void taskId(uint32_t id) { _taskId = id; }
|
||||
|
||||
uint32_t getPid() const { return _pid; }
|
||||
void setPid(uint32_t pid) { _pid = pid; }
|
||||
|
||||
inline void workItemBegin() { numWorkItemsStarted++; }
|
||||
inline void workItemEnd() { numWorkItemsCompleted++; }
|
||||
// @todo remove me after debugging with legion done
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
/*
|
||||
* Copyright (c) 2012 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2002-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
|
@ -47,6 +59,24 @@
|
|||
#include "base/types.hh"
|
||||
#include "sim/core.hh"
|
||||
|
||||
/**
|
||||
* Special TaskIds that are used for per-context-switch stats dumps
|
||||
* and Cache Occupancy. Having too many tasks seems to be a problem
|
||||
* with vector stats. 1024 seems to be a reasonable number that
|
||||
* doesn't cause a problem with stats and is large enough to realistic
|
||||
* benchmarks (Linux/Android boot, BBench, etc.)
|
||||
*/
|
||||
|
||||
namespace ContextSwitchTaskId {
|
||||
enum TaskId {
|
||||
MaxNormalTaskId = 1021, /* Maximum number of normal tasks */
|
||||
Prefetcher = 1022, /* For cache lines brought in by prefetcher */
|
||||
DMA = 1023, /* Mostly Table Walker */
|
||||
Unknown = 1024,
|
||||
NumTaskId
|
||||
};
|
||||
}
|
||||
|
||||
class Request;
|
||||
|
||||
typedef Request* RequestPtr;
|
||||
|
@ -117,6 +147,10 @@ class Request
|
|||
static const MasterID invldMasterId = USHRT_MAX;
|
||||
/** @} */
|
||||
|
||||
/** Invalid or unknown Pid. Possible when operating system is not present
|
||||
* or has not assigned a pid yet */
|
||||
static const uint32_t invldPid = UINT_MAX;
|
||||
|
||||
private:
|
||||
typedef uint8_t PrivateFlagsType;
|
||||
typedef ::Flags<PrivateFlagsType> PrivateFlags;
|
||||
|
|
|
@ -57,7 +57,7 @@ class SimObject;
|
|||
* SimObject shouldn't cause the version number to increase, only changes to
|
||||
* existing objects such as serializing/unserializing more state, changing sizes
|
||||
* of serialized arrays, etc. */
|
||||
static const uint64_t gem5CheckpointVersion = 0x0000000000000002;
|
||||
static const uint64_t gem5CheckpointVersion = 0x0000000000000003;
|
||||
|
||||
template <class T>
|
||||
void paramOut(std::ostream &os, const std::string &name, const T ¶m);
|
||||
|
|
|
@ -105,9 +105,22 @@ def from_1(cpt):
|
|||
# the system, thus starting at 0
|
||||
raise ValueError("more than one memory detected (" + sec + ")")
|
||||
|
||||
def from_2(cpt):
|
||||
for sec in cpt.sections():
|
||||
import re
|
||||
# Search for a CPUs
|
||||
if re.search('.*sys.*cpu', sec):
|
||||
try:
|
||||
junk = cpt.get(sec, 'instCnt')
|
||||
cpt.set(sec, '_pid', '0')
|
||||
except ConfigParser.NoOptionError:
|
||||
pass
|
||||
|
||||
|
||||
migrations = []
|
||||
migrations.append(from_0)
|
||||
migrations.append(from_1)
|
||||
migrations.append(from_2)
|
||||
|
||||
verbose_print = False
|
||||
|
||||
|
|
Loading…
Reference in a new issue