cpu: Adjust for trace offset and fix stats

This change subtracts the time offset present in the trace from
all the event times when nodes and request are sent so that the
replay starts immediately when the simulation starts. This makes
the stats accurate when the time offset in traces is large, for
example when traces are generated in the middle of a workload
execution. It also solves the problem of unnecessary DRAM
refresh events that would keep occuring during the large time
offset before even a single request is replayed into the system.

Change-Id: Ie0898842615def867ffd5c219948386d952af7f7
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
This commit is contained in:
Radhika Jagtap 2016-09-15 18:01:16 +01:00
parent d7724d5f54
commit d067327fc0
2 changed files with 55 additions and 34 deletions

View file

@ -60,7 +60,7 @@ TraceCPU::TraceCPU(TraceCPUParams *params)
icacheNextEvent(this), icacheNextEvent(this),
dcacheNextEvent(this), dcacheNextEvent(this),
oneTraceComplete(false), oneTraceComplete(false),
firstFetchTick(0), traceOffset(0),
execCompleteEvent(nullptr) execCompleteEvent(nullptr)
{ {
// Increment static counter for number of Trace CPUs. // Increment static counter for number of Trace CPUs.
@ -116,22 +116,31 @@ TraceCPU::init()
BaseCPU::init(); BaseCPU::init();
// Get the send tick of the first instruction read request and schedule // Get the send tick of the first instruction read request
// icacheNextEvent at that tick.
Tick first_icache_tick = icacheGen.init(); Tick first_icache_tick = icacheGen.init();
schedule(icacheNextEvent, first_icache_tick);
// Get the send tick of the first data read/write request and schedule // Get the send tick of the first data read/write request
// dcacheNextEvent at that tick.
Tick first_dcache_tick = dcacheGen.init(); Tick first_dcache_tick = dcacheGen.init();
schedule(dcacheNextEvent, first_dcache_tick);
// Set the trace offset as the minimum of that in both traces
traceOffset = std::min(first_icache_tick, first_dcache_tick);
inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
name(), traceOffset);
// Schedule next icache and dcache event by subtracting the offset
schedule(icacheNextEvent, first_icache_tick - traceOffset);
schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
// Adjust the trace offset for the dcache generator's ready nodes
// We don't need to do this for the icache generator as it will
// send its first request at the first event and schedule subsequent
// events using a relative tick delta
dcacheGen.adjustInitTraceOffset(traceOffset);
// The static counter for number of Trace CPUs is correctly set at this // The static counter for number of Trace CPUs is correctly set at this
// point so create an event and pass it. // point so create an event and pass it.
execCompleteEvent = new CountedExitEvent("end of all traces reached.", execCompleteEvent = new CountedExitEvent("end of all traces reached.",
numTraceCPUs); numTraceCPUs);
// Save the first fetch request tick to dump it as tickOffset
firstFetchTick = first_icache_tick;
} }
void void
@ -164,6 +173,9 @@ TraceCPU::schedDcacheNext()
{ {
DPRINTF(TraceCPUData, "DcacheGen event.\n"); DPRINTF(TraceCPUData, "DcacheGen event.\n");
// Update stat for numCycles
numCycles = clockEdge() / clockPeriod();
dcacheGen.execute(); dcacheGen.execute();
if (dcacheGen.isExecComplete()) { if (dcacheGen.isExecComplete()) {
checkAndSchedExitEvent(); checkAndSchedExitEvent();
@ -179,11 +191,6 @@ TraceCPU::checkAndSchedExitEvent()
// Schedule event to indicate execution is complete as both // Schedule event to indicate execution is complete as both
// instruction and data access traces have been played back. // instruction and data access traces have been played back.
inform("%s: Execution complete.\n", name()); inform("%s: Execution complete.\n", name());
// Record stats which are computed at the end of simulation
tickOffset = firstFetchTick;
numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
numOps = dcacheGen.getMicroOpCount();
schedule(*execCompleteEvent, curTick()); schedule(*execCompleteEvent, curTick());
} }
} }
@ -216,11 +223,6 @@ TraceCPU::regStats()
; ;
cpi = numCycles/numOps; cpi = numCycles/numOps;
tickOffset
.name(name() + ".tickOffset")
.desc("The first execution tick for the root node of elastic traces")
;
icacheGen.regStats(); icacheGen.regStats();
dcacheGen.regStats(); dcacheGen.regStats();
} }
@ -311,6 +313,13 @@ TraceCPU::ElasticDataGen::init()
return (free_itr->execTick); return (free_itr->execTick);
} }
void
TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) {
for (auto& free_node : readyList) {
free_node.execTick -= offset;
}
}
void void
TraceCPU::ElasticDataGen::exit() TraceCPU::ElasticDataGen::exit()
{ {
@ -534,6 +543,8 @@ TraceCPU::ElasticDataGen::execute()
hwResource.release(node_ptr); hwResource.release(node_ptr);
// clear the dynamically allocated set of dependents // clear the dynamically allocated set of dependents
(node_ptr->dependents).clear(); (node_ptr->dependents).clear();
// Update the stat for numOps simulated
owner.updateNumOps(node_ptr->robNum);
// delete node // delete node
delete node_ptr; delete node_ptr;
// remove from graph // remove from graph
@ -736,6 +747,8 @@ TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
// clear the dynamically allocated set of dependents // clear the dynamically allocated set of dependents
(node_ptr->dependents).clear(); (node_ptr->dependents).clear();
// Update the stat for numOps completed
owner.updateNumOps(node_ptr->robNum);
// delete node // delete node
delete node_ptr; delete node_ptr;
// remove from graph // remove from graph

View file

@ -136,10 +136,9 @@
* Strictly-ordered requests are skipped and the dependencies on such requests * Strictly-ordered requests are skipped and the dependencies on such requests
* are handled by simply marking them complete immediately. * are handled by simply marking them complete immediately.
* *
* The simulated seconds can be calculated as the difference between the * A CountedExitEvent that contains a static int belonging to the Trace CPU
* final_tick stat and the tickOffset stat. A CountedExitEvent that contains a * class as a down counter is used to implement multi Trace CPU simulation
* static int belonging to the Trace CPU class as a down counter is used to * exit.
* implement multi Trace CPU simulation exit.
*/ */
class TraceCPU : public BaseCPU class TraceCPU : public BaseCPU
@ -171,9 +170,15 @@ class TraceCPU : public BaseCPU
*/ */
Counter totalOps() const Counter totalOps() const
{ {
return dcacheGen.getMicroOpCount(); return numOps.value();
} }
/*
* Set the no. of ops when elastic data generator completes executing a
* node.
*/
void updateNumOps(uint64_t rob_num) { numOps = rob_num; }
/* Pure virtual function in BaseCPU. Do nothing. */ /* Pure virtual function in BaseCPU. Do nothing. */
void wakeup(ThreadID tid = 0) void wakeup(ThreadID tid = 0)
{ {
@ -876,6 +881,14 @@ class TraceCPU : public BaseCPU
*/ */
Tick init(); Tick init();
/**
* Adjust traceOffset based on what TraceCPU init() determines on
* comparing the offsets in the fetch request and elastic traces.
*
* @param trace_offset trace offset set by comparing both traces
*/
void adjustInitTraceOffset(Tick& offset);
/** Returns name of the ElasticDataGen instance. */ /** Returns name of the ElasticDataGen instance. */
const std::string& name() const { return genName; } const std::string& name() const { return genName; }
@ -1081,10 +1094,12 @@ class TraceCPU : public BaseCPU
bool oneTraceComplete; bool oneTraceComplete;
/** /**
* This is stores the tick of the first instruction fetch request * This stores the time offset in the trace, which is taken away from
* which is later used for dumping the tickOffset stat. * the ready times of requests. This is specially useful because the time
* offset can be very large if the traces are generated from the middle of
* a program.
*/ */
Tick firstFetchTick; Tick traceOffset;
/** /**
* Number of Trace CPUs in the system used as a shared variable and passed * Number of Trace CPUs in the system used as a shared variable and passed
@ -1109,13 +1124,6 @@ class TraceCPU : public BaseCPU
/** Stat for the CPI. This is really cycles per micro-op and not inst. */ /** Stat for the CPI. This is really cycles per micro-op and not inst. */
Stats::Formula cpi; Stats::Formula cpi;
/**
* The first execution tick is dumped as a stat so that the simulated
* seconds for a trace replay can be calculated as a difference between the
* final_tick stat and the tickOffset stat
*/
Stats::Scalar tickOffset;
public: public:
/** Used to get a reference to the icache port. */ /** Used to get a reference to the icache port. */