cpu: generate SimPoint basic block vector profiles
This patch is based on http://reviews.m5sim.org/r/1474/ originally written by Mitch Hayenga. Basic block vectors are generated (simpoint.bb.gz in simout folder) based on start and end addresses of basic blocks. Some comments to the original patch are addressed and hooks are added to create and resume from checkpoints based on instruction counts dictated by external SimPoint analysis tools. SimPoint creation/resuming options will be implemented as a separate patch.
This commit is contained in:
parent
121b15a54d
commit
2c1e344313
7 changed files with 182 additions and 1 deletions
|
@ -50,6 +50,10 @@ def addCommonOptions(parser):
|
|||
parser.add_option("--caches", action="store_true")
|
||||
parser.add_option("--l2cache", action="store_true")
|
||||
parser.add_option("--fastmem", action="store_true")
|
||||
parser.add_option("--simpoint-profile", action="store_true",
|
||||
help="Enable basic block profiling for SimPoints")
|
||||
parser.add_option("--simpoint-interval", type="int", default=10000000,
|
||||
help="SimPoint interval in num of instructions")
|
||||
parser.add_option("--clock", action="store", type="string", default='2GHz')
|
||||
parser.add_option("--num-dirs", type="int", default=1)
|
||||
parser.add_option("--num-l2caches", type="int", default=1)
|
||||
|
|
|
@ -166,6 +166,13 @@ if options.fastmem:
|
|||
if (options.caches or options.l2cache):
|
||||
fatal("You cannot use fastmem in combination with caches!")
|
||||
|
||||
if options.simpoint_profile:
|
||||
if not options.fastmem:
|
||||
# Atomic CPU checked with fastmem option already
|
||||
fatal("SimPoint generation should be done with atomic cpu and fastmem")
|
||||
if np > 1:
|
||||
fatal("SimPoint generation not supported with more than one CPUs")
|
||||
|
||||
for i in xrange(np):
|
||||
if options.smt:
|
||||
system.cpu[i].workload = multiprocesses
|
||||
|
@ -177,6 +184,10 @@ for i in xrange(np):
|
|||
if options.fastmem:
|
||||
system.cpu[i].fastmem = True
|
||||
|
||||
if options.simpoint_profile:
|
||||
system.cpu[i].simpoint_profile = True
|
||||
system.cpu[i].simpoint_interval = options.simpoint_interval
|
||||
|
||||
if options.checker:
|
||||
system.cpu[i].addCheckerCpu()
|
||||
|
||||
|
|
|
@ -187,6 +187,8 @@ class BaseCPU(MemObject):
|
|||
"terminate when all threads have reached this inst count")
|
||||
max_insts_any_thread = Param.Counter(0,
|
||||
"terminate when any thread reaches this inst count")
|
||||
simpoint_start_insts = VectorParam.Counter([],
|
||||
"starting instruction counts of simpoints")
|
||||
max_loads_all_threads = Param.Counter(0,
|
||||
"terminate when all threads have reached this load count")
|
||||
max_loads_any_thread = Param.Counter(0,
|
||||
|
|
|
@ -153,6 +153,18 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
|
|||
}
|
||||
}
|
||||
|
||||
// Set up instruction-count-based termination events for SimPoints
|
||||
// Typically, there are more than one action points.
|
||||
// Simulation.py is responsible to take the necessary actions upon
|
||||
// exitting the simulation loop.
|
||||
if (!p->simpoint_start_insts.empty()) {
|
||||
const char *cause = "simpoint starting point found";
|
||||
for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i) {
|
||||
Event *event = new SimLoopExitEvent(cause, 0);
|
||||
comInstEventQueue[0]->schedule(event, p->simpoint_start_insts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (p->max_insts_all_threads != 0) {
|
||||
const char *cause = "all threads reached the max instruction count";
|
||||
|
||||
|
|
|
@ -61,3 +61,6 @@ class AtomicSimpleCPU(BaseSimpleCPU):
|
|||
simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles")
|
||||
simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles")
|
||||
fastmem = Param.Bool(False, "Access memory directly")
|
||||
simpoint_profile = Param.Bool(False, "Generate SimPoint BBVs")
|
||||
simpoint_interval = Param.UInt64(100000000, "SimPoint Interval Size (insts)")
|
||||
simpoint_profile_file = Param.String("simpoint.bb.gz", "SimPoint BBV file")
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include "arch/mmapped_ipr.hh"
|
||||
#include "arch/utility.hh"
|
||||
#include "base/bigint.hh"
|
||||
#include "base/output.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/simple/atomic.hh"
|
||||
#include "cpu/exetrace.hh"
|
||||
|
@ -109,9 +110,20 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
|
|||
drain_manager(NULL),
|
||||
icachePort(name() + ".icache_port", this),
|
||||
dcachePort(name() + ".dcache_port", this),
|
||||
fastmem(p->fastmem)
|
||||
fastmem(p->fastmem),
|
||||
simpoint(p->simpoint_profile),
|
||||
intervalSize(p->simpoint_interval),
|
||||
intervalCount(0),
|
||||
intervalDrift(0),
|
||||
simpointStream(NULL),
|
||||
currentBBV(0, 0),
|
||||
currentBBVInstCount(0)
|
||||
{
|
||||
_status = Idle;
|
||||
|
||||
if (simpoint) {
|
||||
simpointStream = simout.create(p->simpoint_profile_file, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -120,6 +132,9 @@ AtomicSimpleCPU::~AtomicSimpleCPU()
|
|||
if (tickEvent.scheduled()) {
|
||||
deschedule(tickEvent);
|
||||
}
|
||||
if (simpointStream) {
|
||||
simout.close(simpointStream);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int
|
||||
|
@ -534,6 +549,13 @@ AtomicSimpleCPU::tick()
|
|||
curStaticInst->isFirstMicroop()))
|
||||
instCnt++;
|
||||
|
||||
// profile for SimPoints if enabled and macro inst is finished
|
||||
if (simpoint && curStaticInst && (fault == NoFault) &&
|
||||
(!curStaticInst->isMicroop() ||
|
||||
curStaticInst->isLastMicroop())) {
|
||||
profileSimPoint();
|
||||
}
|
||||
|
||||
Tick stall_ticks = 0;
|
||||
if (simulate_inst_stalls && icache_access)
|
||||
stall_ticks += icache_latency;
|
||||
|
@ -572,6 +594,67 @@ AtomicSimpleCPU::printAddr(Addr a)
|
|||
dcachePort.printAddr(a);
|
||||
}
|
||||
|
||||
void
|
||||
AtomicSimpleCPU::profileSimPoint()
|
||||
{
|
||||
if (!currentBBVInstCount)
|
||||
currentBBV.first = thread->pcState().instAddr();
|
||||
|
||||
++intervalCount;
|
||||
++currentBBVInstCount;
|
||||
|
||||
// If inst is control inst, assume end of basic block.
|
||||
if (curStaticInst->isControl()) {
|
||||
currentBBV.second = thread->pcState().instAddr();
|
||||
|
||||
auto map_itr = bbMap.find(currentBBV);
|
||||
if (map_itr == bbMap.end()){
|
||||
// If a new (previously unseen) basic block is found,
|
||||
// add a new unique id, record num of insts and insert into bbMap.
|
||||
BBInfo info;
|
||||
info.id = bbMap.size() + 1;
|
||||
info.insts = currentBBVInstCount;
|
||||
info.count = currentBBVInstCount;
|
||||
bbMap.insert(std::make_pair(currentBBV, info));
|
||||
} else {
|
||||
// If basic block is seen before, just increment the count by the
|
||||
// number of insts in basic block.
|
||||
BBInfo& info = map_itr->second;
|
||||
assert(info.insts == currentBBVInstCount);
|
||||
info.count += currentBBVInstCount;
|
||||
}
|
||||
currentBBVInstCount = 0;
|
||||
|
||||
// Reached end of interval if the sum of the current inst count
|
||||
// (intervalCount) and the excessive inst count from the previous
|
||||
// interval (intervalDrift) is greater than/equal to the interval size.
|
||||
if (intervalCount + intervalDrift >= intervalSize) {
|
||||
// summarize interval and display BBV info
|
||||
std::vector<pair<uint64_t, uint64_t> > counts;
|
||||
for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
|
||||
++map_itr) {
|
||||
BBInfo& info = map_itr->second;
|
||||
if (info.count != 0) {
|
||||
counts.push_back(std::make_pair(info.id, info.count));
|
||||
info.count = 0;
|
||||
}
|
||||
}
|
||||
std::sort(counts.begin(), counts.end());
|
||||
|
||||
// Print output BBV info
|
||||
*simpointStream << "T";
|
||||
for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
|
||||
++cnt_itr) {
|
||||
*simpointStream << ":" << cnt_itr->first
|
||||
<< ":" << cnt_itr->second << " ";
|
||||
}
|
||||
*simpointStream << "\n";
|
||||
|
||||
intervalDrift = (intervalCount + intervalDrift) - intervalSize;
|
||||
intervalCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
|
|
@ -43,9 +43,31 @@
|
|||
#ifndef __CPU_SIMPLE_ATOMIC_HH__
|
||||
#define __CPU_SIMPLE_ATOMIC_HH__
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "cpu/simple/base.hh"
|
||||
#include "params/AtomicSimpleCPU.hh"
|
||||
|
||||
/**
|
||||
* Start and end address of basic block for SimPoint profiling.
|
||||
* This structure is used to look up the hash table of BBVs.
|
||||
* - first: PC of first inst in basic block
|
||||
* - second: PC of last inst in basic block
|
||||
*/
|
||||
typedef std::pair<Addr, Addr> BasicBlockRange;
|
||||
|
||||
/** Overload hash function for BasicBlockRange type */
|
||||
__hash_namespace_begin
|
||||
template <>
|
||||
class hash<BasicBlockRange>
|
||||
{
|
||||
public:
|
||||
size_t operator()(const BasicBlockRange &bb) const {
|
||||
return hash<Addr>()(bb.first + bb.second);
|
||||
}
|
||||
};
|
||||
__hash_namespace_end
|
||||
|
||||
|
||||
class AtomicSimpleCPU : public BaseSimpleCPU
|
||||
{
|
||||
public:
|
||||
|
@ -161,6 +183,50 @@ class AtomicSimpleCPU : public BaseSimpleCPU
|
|||
bool dcache_access;
|
||||
Tick dcache_latency;
|
||||
|
||||
/**
|
||||
* Profile basic blocks for SimPoints.
|
||||
* Called at every macro inst to increment basic block inst counts and
|
||||
* to profile block if end of block.
|
||||
*/
|
||||
void profileSimPoint();
|
||||
|
||||
/** Data structures for SimPoints BBV generation
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** Whether SimPoint BBV profiling is enabled */
|
||||
const bool simpoint;
|
||||
/** SimPoint profiling interval size in instructions */
|
||||
const uint64_t intervalSize;
|
||||
|
||||
/** Inst count in current basic block */
|
||||
uint64_t intervalCount;
|
||||
/** Excess inst count from previous interval*/
|
||||
uint64_t intervalDrift;
|
||||
/** Pointer to SimPoint BBV output stream */
|
||||
std::ostream *simpointStream;
|
||||
|
||||
/** Basic Block information */
|
||||
struct BBInfo {
|
||||
/** Unique ID */
|
||||
uint64_t id;
|
||||
/** Num of static insts in BB */
|
||||
uint64_t insts;
|
||||
/** Accumulated dynamic inst count executed by BB */
|
||||
uint64_t count;
|
||||
};
|
||||
|
||||
/** Hash table containing all previously seen basic blocks */
|
||||
m5::hash_map<BasicBlockRange, BBInfo> bbMap;
|
||||
/** Currently executing basic block */
|
||||
BasicBlockRange currentBBV;
|
||||
/** inst count in current basic block */
|
||||
uint64_t currentBBVInstCount;
|
||||
|
||||
/** @}
|
||||
* End of data structures for SimPoints BBV generation
|
||||
*/
|
||||
|
||||
protected:
|
||||
|
||||
/** Return a reference to the data port. */
|
||||
|
|
Loading…
Reference in a new issue