From 2c1e34431326381833de289b1d90f2427ba16c98 Mon Sep 17 00:00:00 2001 From: Dam Sunwoo Date: Mon, 22 Apr 2013 13:20:31 -0400 Subject: [PATCH] cpu: generate SimPoint basic block vector profiles This patch is based on http://reviews.m5sim.org/r/1474/ originally written by Mitch Hayenga. Basic block vectors are generated (simpoint.bb.gz in simout folder) based on start and end addresses of basic blocks. Some comments to the original patch are addressed and hooks are added to create and resume from checkpoints based on instruction counts dictated by external SimPoint analysis tools. SimPoint creation/resuming options will be implemented as a separate patch. --- configs/common/Options.py | 4 ++ configs/example/se.py | 11 ++++ src/cpu/BaseCPU.py | 2 + src/cpu/base.cc | 12 +++++ src/cpu/simple/AtomicSimpleCPU.py | 3 ++ src/cpu/simple/atomic.cc | 85 ++++++++++++++++++++++++++++++- src/cpu/simple/atomic.hh | 66 ++++++++++++++++++++++++ 7 files changed, 182 insertions(+), 1 deletion(-) diff --git a/configs/common/Options.py b/configs/common/Options.py index 0c651b501..474da94f4 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -50,6 +50,10 @@ def addCommonOptions(parser): parser.add_option("--caches", action="store_true") parser.add_option("--l2cache", action="store_true") parser.add_option("--fastmem", action="store_true") + parser.add_option("--simpoint-profile", action="store_true", + help="Enable basic block profiling for SimPoints") + parser.add_option("--simpoint-interval", type="int", default=10000000, + help="SimPoint interval in num of instructions") parser.add_option("--clock", action="store", type="string", default='2GHz') parser.add_option("--num-dirs", type="int", default=1) parser.add_option("--num-l2caches", type="int", default=1) diff --git a/configs/example/se.py b/configs/example/se.py index 20149cccd..a5f0204fd 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -166,6 +166,13 @@ if options.fastmem: if (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") +if options.simpoint_profile: + if not options.fastmem: + # Atomic CPU checked with fastmem option already + fatal("SimPoint generation should be done with atomic cpu and fastmem") + if np > 1: + fatal("SimPoint generation not supported with more than one CPUs") + for i in xrange(np): if options.smt: system.cpu[i].workload = multiprocesses @@ -177,6 +184,10 @@ for i in xrange(np): if options.fastmem: system.cpu[i].fastmem = True + if options.simpoint_profile: + system.cpu[i].simpoint_profile = True + system.cpu[i].simpoint_interval = options.simpoint_interval + if options.checker: system.cpu[i].addCheckerCpu() diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 5e1a0a961..f47838e83 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -187,6 +187,8 @@ class BaseCPU(MemObject): "terminate when all threads have reached this inst count") max_insts_any_thread = Param.Counter(0, "terminate when any thread reaches this inst count") + simpoint_start_insts = VectorParam.Counter([], + "starting instruction counts of simpoints") max_loads_all_threads = Param.Counter(0, "terminate when all threads have reached this load count") max_loads_any_thread = Param.Counter(0, diff --git a/src/cpu/base.cc b/src/cpu/base.cc index de0f8b23b..c7c1dadda 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -153,6 +153,18 @@ BaseCPU::BaseCPU(Params *p, bool is_checker) } } + // Set up instruction-count-based termination events for SimPoints + // Typically, there are more than one action points. + // Simulation.py is responsible to take the necessary actions upon + // exitting the simulation loop. + if (!p->simpoint_start_insts.empty()) { + const char *cause = "simpoint starting point found"; + for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i) { + Event *event = new SimLoopExitEvent(cause, 0); + comInstEventQueue[0]->schedule(event, p->simpoint_start_insts[i]); + } + } + if (p->max_insts_all_threads != 0) { const char *cause = "all threads reached the max instruction count"; diff --git a/src/cpu/simple/AtomicSimpleCPU.py b/src/cpu/simple/AtomicSimpleCPU.py index c747582f6..7a066457d 100644 --- a/src/cpu/simple/AtomicSimpleCPU.py +++ b/src/cpu/simple/AtomicSimpleCPU.py @@ -61,3 +61,6 @@ class AtomicSimpleCPU(BaseSimpleCPU): simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles") simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles") fastmem = Param.Bool(False, "Access memory directly") + simpoint_profile = Param.Bool(False, "Generate SimPoint BBVs") + simpoint_interval = Param.UInt64(100000000, "SimPoint Interval Size (insts)") + simpoint_profile_file = Param.String("simpoint.bb.gz", "SimPoint BBV file") diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index d7c4190ee..1dd9675f9 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -44,6 +44,7 @@ #include "arch/mmapped_ipr.hh" #include "arch/utility.hh" #include "base/bigint.hh" +#include "base/output.hh" #include "config/the_isa.hh" #include "cpu/simple/atomic.hh" #include "cpu/exetrace.hh" @@ -109,9 +110,20 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) drain_manager(NULL), icachePort(name() + ".icache_port", this), dcachePort(name() + ".dcache_port", this), - fastmem(p->fastmem) + fastmem(p->fastmem), + simpoint(p->simpoint_profile), + intervalSize(p->simpoint_interval), + intervalCount(0), + intervalDrift(0), + simpointStream(NULL), + currentBBV(0, 0), + currentBBVInstCount(0) { _status = Idle; + + if (simpoint) { + simpointStream = simout.create(p->simpoint_profile_file, false); + } } @@ -120,6 +132,9 @@ AtomicSimpleCPU::~AtomicSimpleCPU() if (tickEvent.scheduled()) { deschedule(tickEvent); } + if (simpointStream) { + simout.close(simpointStream); + } } unsigned int @@ -534,6 +549,13 @@ AtomicSimpleCPU::tick() curStaticInst->isFirstMicroop())) instCnt++; + // profile for SimPoints if enabled and macro inst is finished + if (simpoint && curStaticInst && (fault == NoFault) && + (!curStaticInst->isMicroop() || + curStaticInst->isLastMicroop())) { + profileSimPoint(); + } + Tick stall_ticks = 0; if (simulate_inst_stalls && icache_access) stall_ticks += icache_latency; @@ -572,6 +594,67 @@ AtomicSimpleCPU::printAddr(Addr a) dcachePort.printAddr(a); } +void +AtomicSimpleCPU::profileSimPoint() +{ + if (!currentBBVInstCount) + currentBBV.first = thread->pcState().instAddr(); + + ++intervalCount; + ++currentBBVInstCount; + + // If inst is control inst, assume end of basic block. + if (curStaticInst->isControl()) { + currentBBV.second = thread->pcState().instAddr(); + + auto map_itr = bbMap.find(currentBBV); + if (map_itr == bbMap.end()){ + // If a new (previously unseen) basic block is found, + // add a new unique id, record num of insts and insert into bbMap. + BBInfo info; + info.id = bbMap.size() + 1; + info.insts = currentBBVInstCount; + info.count = currentBBVInstCount; + bbMap.insert(std::make_pair(currentBBV, info)); + } else { + // If basic block is seen before, just increment the count by the + // number of insts in basic block. + BBInfo& info = map_itr->second; + assert(info.insts == currentBBVInstCount); + info.count += currentBBVInstCount; + } + currentBBVInstCount = 0; + + // Reached end of interval if the sum of the current inst count + // (intervalCount) and the excessive inst count from the previous + // interval (intervalDrift) is greater than/equal to the interval size. + if (intervalCount + intervalDrift >= intervalSize) { + // summarize interval and display BBV info + std::vector > counts; + for (auto map_itr = bbMap.begin(); map_itr != bbMap.end(); + ++map_itr) { + BBInfo& info = map_itr->second; + if (info.count != 0) { + counts.push_back(std::make_pair(info.id, info.count)); + info.count = 0; + } + } + std::sort(counts.begin(), counts.end()); + + // Print output BBV info + *simpointStream << "T"; + for (auto cnt_itr = counts.begin(); cnt_itr != counts.end(); + ++cnt_itr) { + *simpointStream << ":" << cnt_itr->first + << ":" << cnt_itr->second << " "; + } + *simpointStream << "\n"; + + intervalDrift = (intervalCount + intervalDrift) - intervalSize; + intervalCount = 0; + } + } +} //////////////////////////////////////////////////////////////////////// // diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 9bb653bcc..5a9275a77 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -43,9 +43,31 @@ #ifndef __CPU_SIMPLE_ATOMIC_HH__ #define __CPU_SIMPLE_ATOMIC_HH__ +#include "base/hashmap.hh" #include "cpu/simple/base.hh" #include "params/AtomicSimpleCPU.hh" +/** + * Start and end address of basic block for SimPoint profiling. + * This structure is used to look up the hash table of BBVs. + * - first: PC of first inst in basic block + * - second: PC of last inst in basic block + */ +typedef std::pair BasicBlockRange; + +/** Overload hash function for BasicBlockRange type */ +__hash_namespace_begin +template <> +class hash +{ + public: + size_t operator()(const BasicBlockRange &bb) const { + return hash()(bb.first + bb.second); + } +}; +__hash_namespace_end + + class AtomicSimpleCPU : public BaseSimpleCPU { public: @@ -161,6 +183,50 @@ class AtomicSimpleCPU : public BaseSimpleCPU bool dcache_access; Tick dcache_latency; + /** + * Profile basic blocks for SimPoints. + * Called at every macro inst to increment basic block inst counts and + * to profile block if end of block. + */ + void profileSimPoint(); + + /** Data structures for SimPoints BBV generation + * @{ + */ + + /** Whether SimPoint BBV profiling is enabled */ + const bool simpoint; + /** SimPoint profiling interval size in instructions */ + const uint64_t intervalSize; + + /** Inst count in current basic block */ + uint64_t intervalCount; + /** Excess inst count from previous interval*/ + uint64_t intervalDrift; + /** Pointer to SimPoint BBV output stream */ + std::ostream *simpointStream; + + /** Basic Block information */ + struct BBInfo { + /** Unique ID */ + uint64_t id; + /** Num of static insts in BB */ + uint64_t insts; + /** Accumulated dynamic inst count executed by BB */ + uint64_t count; + }; + + /** Hash table containing all previously seen basic blocks */ + m5::hash_map bbMap; + /** Currently executing basic block */ + BasicBlockRange currentBBV; + /** inst count in current basic block */ + uint64_t currentBBVInstCount; + + /** @} + * End of data structures for SimPoints BBV generation + */ + protected: /** Return a reference to the data port. */