cpu: generate SimPoint basic block vector profiles

This patch is based on http://reviews.m5sim.org/r/1474/ originally written by
Mitch Hayenga. Basic block vectors are generated (simpoint.bb.gz in simout
folder) based on start and end addresses of basic blocks.

Some comments to the original patch are addressed and hooks are added to create
and resume from checkpoints based on instruction counts dictated by external
SimPoint analysis tools.

SimPoint creation/resuming options will be implemented as a separate patch.
This commit is contained in:
Dam Sunwoo 2013-04-22 13:20:31 -04:00
parent 121b15a54d
commit 2c1e344313
7 changed files with 182 additions and 1 deletions

View file

@ -50,6 +50,10 @@ def addCommonOptions(parser):
parser.add_option("--caches", action="store_true")
parser.add_option("--l2cache", action="store_true")
parser.add_option("--fastmem", action="store_true")
parser.add_option("--simpoint-profile", action="store_true",
help="Enable basic block profiling for SimPoints")
parser.add_option("--simpoint-interval", type="int", default=10000000,
help="SimPoint interval in num of instructions")
parser.add_option("--clock", action="store", type="string", default='2GHz')
parser.add_option("--num-dirs", type="int", default=1)
parser.add_option("--num-l2caches", type="int", default=1)

View file

@ -166,6 +166,13 @@ if options.fastmem:
if (options.caches or options.l2cache):
fatal("You cannot use fastmem in combination with caches!")
if options.simpoint_profile:
if not options.fastmem:
# Atomic CPU checked with fastmem option already
fatal("SimPoint generation should be done with atomic cpu and fastmem")
if np > 1:
fatal("SimPoint generation not supported with more than one CPUs")
for i in xrange(np):
if options.smt:
system.cpu[i].workload = multiprocesses
@ -177,6 +184,10 @@ for i in xrange(np):
if options.fastmem:
system.cpu[i].fastmem = True
if options.simpoint_profile:
system.cpu[i].simpoint_profile = True
system.cpu[i].simpoint_interval = options.simpoint_interval
if options.checker:
system.cpu[i].addCheckerCpu()

View file

@ -187,6 +187,8 @@ class BaseCPU(MemObject):
"terminate when all threads have reached this inst count")
max_insts_any_thread = Param.Counter(0,
"terminate when any thread reaches this inst count")
simpoint_start_insts = VectorParam.Counter([],
"starting instruction counts of simpoints")
max_loads_all_threads = Param.Counter(0,
"terminate when all threads have reached this load count")
max_loads_any_thread = Param.Counter(0,

View file

@ -153,6 +153,18 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
}
}
// Set up instruction-count-based termination events for SimPoints
// Typically, there are more than one action points.
// Simulation.py is responsible to take the necessary actions upon
// exitting the simulation loop.
if (!p->simpoint_start_insts.empty()) {
const char *cause = "simpoint starting point found";
for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i) {
Event *event = new SimLoopExitEvent(cause, 0);
comInstEventQueue[0]->schedule(event, p->simpoint_start_insts[i]);
}
}
if (p->max_insts_all_threads != 0) {
const char *cause = "all threads reached the max instruction count";

View file

@ -61,3 +61,6 @@ class AtomicSimpleCPU(BaseSimpleCPU):
simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles")
simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles")
fastmem = Param.Bool(False, "Access memory directly")
simpoint_profile = Param.Bool(False, "Generate SimPoint BBVs")
simpoint_interval = Param.UInt64(100000000, "SimPoint Interval Size (insts)")
simpoint_profile_file = Param.String("simpoint.bb.gz", "SimPoint BBV file")

View file

@ -44,6 +44,7 @@
#include "arch/mmapped_ipr.hh"
#include "arch/utility.hh"
#include "base/bigint.hh"
#include "base/output.hh"
#include "config/the_isa.hh"
#include "cpu/simple/atomic.hh"
#include "cpu/exetrace.hh"
@ -109,9 +110,20 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
drain_manager(NULL),
icachePort(name() + ".icache_port", this),
dcachePort(name() + ".dcache_port", this),
fastmem(p->fastmem)
fastmem(p->fastmem),
simpoint(p->simpoint_profile),
intervalSize(p->simpoint_interval),
intervalCount(0),
intervalDrift(0),
simpointStream(NULL),
currentBBV(0, 0),
currentBBVInstCount(0)
{
_status = Idle;
if (simpoint) {
simpointStream = simout.create(p->simpoint_profile_file, false);
}
}
@ -120,6 +132,9 @@ AtomicSimpleCPU::~AtomicSimpleCPU()
if (tickEvent.scheduled()) {
deschedule(tickEvent);
}
if (simpointStream) {
simout.close(simpointStream);
}
}
unsigned int
@ -534,6 +549,13 @@ AtomicSimpleCPU::tick()
curStaticInst->isFirstMicroop()))
instCnt++;
// profile for SimPoints if enabled and macro inst is finished
if (simpoint && curStaticInst && (fault == NoFault) &&
(!curStaticInst->isMicroop() ||
curStaticInst->isLastMicroop())) {
profileSimPoint();
}
Tick stall_ticks = 0;
if (simulate_inst_stalls && icache_access)
stall_ticks += icache_latency;
@ -572,6 +594,67 @@ AtomicSimpleCPU::printAddr(Addr a)
dcachePort.printAddr(a);
}
void
AtomicSimpleCPU::profileSimPoint()
{
if (!currentBBVInstCount)
currentBBV.first = thread->pcState().instAddr();
++intervalCount;
++currentBBVInstCount;
// If inst is control inst, assume end of basic block.
if (curStaticInst->isControl()) {
currentBBV.second = thread->pcState().instAddr();
auto map_itr = bbMap.find(currentBBV);
if (map_itr == bbMap.end()){
// If a new (previously unseen) basic block is found,
// add a new unique id, record num of insts and insert into bbMap.
BBInfo info;
info.id = bbMap.size() + 1;
info.insts = currentBBVInstCount;
info.count = currentBBVInstCount;
bbMap.insert(std::make_pair(currentBBV, info));
} else {
// If basic block is seen before, just increment the count by the
// number of insts in basic block.
BBInfo& info = map_itr->second;
assert(info.insts == currentBBVInstCount);
info.count += currentBBVInstCount;
}
currentBBVInstCount = 0;
// Reached end of interval if the sum of the current inst count
// (intervalCount) and the excessive inst count from the previous
// interval (intervalDrift) is greater than/equal to the interval size.
if (intervalCount + intervalDrift >= intervalSize) {
// summarize interval and display BBV info
std::vector<pair<uint64_t, uint64_t> > counts;
for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
++map_itr) {
BBInfo& info = map_itr->second;
if (info.count != 0) {
counts.push_back(std::make_pair(info.id, info.count));
info.count = 0;
}
}
std::sort(counts.begin(), counts.end());
// Print output BBV info
*simpointStream << "T";
for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
++cnt_itr) {
*simpointStream << ":" << cnt_itr->first
<< ":" << cnt_itr->second << " ";
}
*simpointStream << "\n";
intervalDrift = (intervalCount + intervalDrift) - intervalSize;
intervalCount = 0;
}
}
}
////////////////////////////////////////////////////////////////////////
//

View file

@ -43,9 +43,31 @@
#ifndef __CPU_SIMPLE_ATOMIC_HH__
#define __CPU_SIMPLE_ATOMIC_HH__
#include "base/hashmap.hh"
#include "cpu/simple/base.hh"
#include "params/AtomicSimpleCPU.hh"
/**
* Start and end address of basic block for SimPoint profiling.
* This structure is used to look up the hash table of BBVs.
* - first: PC of first inst in basic block
* - second: PC of last inst in basic block
*/
typedef std::pair<Addr, Addr> BasicBlockRange;
/** Overload hash function for BasicBlockRange type */
__hash_namespace_begin
template <>
class hash<BasicBlockRange>
{
public:
size_t operator()(const BasicBlockRange &bb) const {
return hash<Addr>()(bb.first + bb.second);
}
};
__hash_namespace_end
class AtomicSimpleCPU : public BaseSimpleCPU
{
public:
@ -161,6 +183,50 @@ class AtomicSimpleCPU : public BaseSimpleCPU
bool dcache_access;
Tick dcache_latency;
/**
* Profile basic blocks for SimPoints.
* Called at every macro inst to increment basic block inst counts and
* to profile block if end of block.
*/
void profileSimPoint();
/** Data structures for SimPoints BBV generation
* @{
*/
/** Whether SimPoint BBV profiling is enabled */
const bool simpoint;
/** SimPoint profiling interval size in instructions */
const uint64_t intervalSize;
/** Inst count in current basic block */
uint64_t intervalCount;
/** Excess inst count from previous interval*/
uint64_t intervalDrift;
/** Pointer to SimPoint BBV output stream */
std::ostream *simpointStream;
/** Basic Block information */
struct BBInfo {
/** Unique ID */
uint64_t id;
/** Num of static insts in BB */
uint64_t insts;
/** Accumulated dynamic inst count executed by BB */
uint64_t count;
};
/** Hash table containing all previously seen basic blocks */
m5::hash_map<BasicBlockRange, BBInfo> bbMap;
/** Currently executing basic block */
BasicBlockRange currentBBV;
/** inst count in current basic block */
uint64_t currentBBVInstCount;
/** @}
* End of data structures for SimPoints BBV generation
*/
protected:
/** Return a reference to the data port. */