diff --git a/configs/common/Options.py b/configs/common/Options.py index 0c651b501..474da94f4 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -50,6 +50,10 @@ def addCommonOptions(parser): parser.add_option("--caches", action="store_true") parser.add_option("--l2cache", action="store_true") parser.add_option("--fastmem", action="store_true") + parser.add_option("--simpoint-profile", action="store_true", + help="Enable basic block profiling for SimPoints") + parser.add_option("--simpoint-interval", type="int", default=10000000, + help="SimPoint interval in num of instructions") parser.add_option("--clock", action="store", type="string", default='2GHz') parser.add_option("--num-dirs", type="int", default=1) parser.add_option("--num-l2caches", type="int", default=1) diff --git a/configs/example/se.py b/configs/example/se.py index 20149cccd..a5f0204fd 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -166,6 +166,13 @@ if options.fastmem: if (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") +if options.simpoint_profile: + if not options.fastmem: + # Atomic CPU checked with fastmem option already + fatal("SimPoint generation should be done with atomic cpu and fastmem") + if np > 1: + fatal("SimPoint generation not supported with more than one CPUs") + for i in xrange(np): if options.smt: system.cpu[i].workload = multiprocesses @@ -177,6 +184,10 @@ for i in xrange(np): if options.fastmem: system.cpu[i].fastmem = True + if options.simpoint_profile: + system.cpu[i].simpoint_profile = True + system.cpu[i].simpoint_interval = options.simpoint_interval + if options.checker: system.cpu[i].addCheckerCpu() diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 5e1a0a961..f47838e83 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -187,6 +187,8 @@ class BaseCPU(MemObject): "terminate when all threads have reached this inst count") max_insts_any_thread = Param.Counter(0, "terminate when any thread reaches this inst count") + simpoint_start_insts = VectorParam.Counter([], + "starting instruction counts of simpoints") max_loads_all_threads = Param.Counter(0, "terminate when all threads have reached this load count") max_loads_any_thread = Param.Counter(0, diff --git a/src/cpu/base.cc b/src/cpu/base.cc index de0f8b23b..c7c1dadda 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -153,6 +153,18 @@ BaseCPU::BaseCPU(Params *p, bool is_checker) } } + // Set up instruction-count-based termination events for SimPoints + // Typically, there are more than one action points. + // Simulation.py is responsible to take the necessary actions upon + // exitting the simulation loop. + if (!p->simpoint_start_insts.empty()) { + const char *cause = "simpoint starting point found"; + for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i) { + Event *event = new SimLoopExitEvent(cause, 0); + comInstEventQueue[0]->schedule(event, p->simpoint_start_insts[i]); + } + } + if (p->max_insts_all_threads != 0) { const char *cause = "all threads reached the max instruction count"; diff --git a/src/cpu/simple/AtomicSimpleCPU.py b/src/cpu/simple/AtomicSimpleCPU.py index c747582f6..7a066457d 100644 --- a/src/cpu/simple/AtomicSimpleCPU.py +++ b/src/cpu/simple/AtomicSimpleCPU.py @@ -61,3 +61,6 @@ class AtomicSimpleCPU(BaseSimpleCPU): simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles") simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles") fastmem = Param.Bool(False, "Access memory directly") + simpoint_profile = Param.Bool(False, "Generate SimPoint BBVs") + simpoint_interval = Param.UInt64(100000000, "SimPoint Interval Size (insts)") + simpoint_profile_file = Param.String("simpoint.bb.gz", "SimPoint BBV file") diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index d7c4190ee..1dd9675f9 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -44,6 +44,7 @@ #include "arch/mmapped_ipr.hh" #include "arch/utility.hh" #include "base/bigint.hh" +#include "base/output.hh" #include "config/the_isa.hh" #include "cpu/simple/atomic.hh" #include "cpu/exetrace.hh" @@ -109,9 +110,20 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) drain_manager(NULL), icachePort(name() + ".icache_port", this), dcachePort(name() + ".dcache_port", this), - fastmem(p->fastmem) + fastmem(p->fastmem), + simpoint(p->simpoint_profile), + intervalSize(p->simpoint_interval), + intervalCount(0), + intervalDrift(0), + simpointStream(NULL), + currentBBV(0, 0), + currentBBVInstCount(0) { _status = Idle; + + if (simpoint) { + simpointStream = simout.create(p->simpoint_profile_file, false); + } } @@ -120,6 +132,9 @@ AtomicSimpleCPU::~AtomicSimpleCPU() if (tickEvent.scheduled()) { deschedule(tickEvent); } + if (simpointStream) { + simout.close(simpointStream); + } } unsigned int @@ -534,6 +549,13 @@ AtomicSimpleCPU::tick() curStaticInst->isFirstMicroop())) instCnt++; + // profile for SimPoints if enabled and macro inst is finished + if (simpoint && curStaticInst && (fault == NoFault) && + (!curStaticInst->isMicroop() || + curStaticInst->isLastMicroop())) { + profileSimPoint(); + } + Tick stall_ticks = 0; if (simulate_inst_stalls && icache_access) stall_ticks += icache_latency; @@ -572,6 +594,67 @@ AtomicSimpleCPU::printAddr(Addr a) dcachePort.printAddr(a); } +void +AtomicSimpleCPU::profileSimPoint() +{ + if (!currentBBVInstCount) + currentBBV.first = thread->pcState().instAddr(); + + ++intervalCount; + ++currentBBVInstCount; + + // If inst is control inst, assume end of basic block. + if (curStaticInst->isControl()) { + currentBBV.second = thread->pcState().instAddr(); + + auto map_itr = bbMap.find(currentBBV); + if (map_itr == bbMap.end()){ + // If a new (previously unseen) basic block is found, + // add a new unique id, record num of insts and insert into bbMap. + BBInfo info; + info.id = bbMap.size() + 1; + info.insts = currentBBVInstCount; + info.count = currentBBVInstCount; + bbMap.insert(std::make_pair(currentBBV, info)); + } else { + // If basic block is seen before, just increment the count by the + // number of insts in basic block. + BBInfo& info = map_itr->second; + assert(info.insts == currentBBVInstCount); + info.count += currentBBVInstCount; + } + currentBBVInstCount = 0; + + // Reached end of interval if the sum of the current inst count + // (intervalCount) and the excessive inst count from the previous + // interval (intervalDrift) is greater than/equal to the interval size. + if (intervalCount + intervalDrift >= intervalSize) { + // summarize interval and display BBV info + std::vector > counts; + for (auto map_itr = bbMap.begin(); map_itr != bbMap.end(); + ++map_itr) { + BBInfo& info = map_itr->second; + if (info.count != 0) { + counts.push_back(std::make_pair(info.id, info.count)); + info.count = 0; + } + } + std::sort(counts.begin(), counts.end()); + + // Print output BBV info + *simpointStream << "T"; + for (auto cnt_itr = counts.begin(); cnt_itr != counts.end(); + ++cnt_itr) { + *simpointStream << ":" << cnt_itr->first + << ":" << cnt_itr->second << " "; + } + *simpointStream << "\n"; + + intervalDrift = (intervalCount + intervalDrift) - intervalSize; + intervalCount = 0; + } + } +} //////////////////////////////////////////////////////////////////////// // diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 9bb653bcc..5a9275a77 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -43,9 +43,31 @@ #ifndef __CPU_SIMPLE_ATOMIC_HH__ #define __CPU_SIMPLE_ATOMIC_HH__ +#include "base/hashmap.hh" #include "cpu/simple/base.hh" #include "params/AtomicSimpleCPU.hh" +/** + * Start and end address of basic block for SimPoint profiling. + * This structure is used to look up the hash table of BBVs. + * - first: PC of first inst in basic block + * - second: PC of last inst in basic block + */ +typedef std::pair BasicBlockRange; + +/** Overload hash function for BasicBlockRange type */ +__hash_namespace_begin +template <> +class hash +{ + public: + size_t operator()(const BasicBlockRange &bb) const { + return hash()(bb.first + bb.second); + } +}; +__hash_namespace_end + + class AtomicSimpleCPU : public BaseSimpleCPU { public: @@ -161,6 +183,50 @@ class AtomicSimpleCPU : public BaseSimpleCPU bool dcache_access; Tick dcache_latency; + /** + * Profile basic blocks for SimPoints. + * Called at every macro inst to increment basic block inst counts and + * to profile block if end of block. + */ + void profileSimPoint(); + + /** Data structures for SimPoints BBV generation + * @{ + */ + + /** Whether SimPoint BBV profiling is enabled */ + const bool simpoint; + /** SimPoint profiling interval size in instructions */ + const uint64_t intervalSize; + + /** Inst count in current basic block */ + uint64_t intervalCount; + /** Excess inst count from previous interval*/ + uint64_t intervalDrift; + /** Pointer to SimPoint BBV output stream */ + std::ostream *simpointStream; + + /** Basic Block information */ + struct BBInfo { + /** Unique ID */ + uint64_t id; + /** Num of static insts in BB */ + uint64_t insts; + /** Accumulated dynamic inst count executed by BB */ + uint64_t count; + }; + + /** Hash table containing all previously seen basic blocks */ + m5::hash_map bbMap; + /** Currently executing basic block */ + BasicBlockRange currentBBV; + /** inst count in current basic block */ + uint64_t currentBBVInstCount; + + /** @} + * End of data structures for SimPoints BBV generation + */ + protected: /** Return a reference to the data port. */