54519fd51f
This patch adds support to optionally capture the virtual address and asid for load/store instructions in the elastic traces. If they are present in the traces, Trace CPU will set those fields of the request during replay.
951 lines
39 KiB
C++
951 lines
39 KiB
C++
/*
|
|
* Copyright (c) 2013 - 2015 ARM Limited
|
|
* All rights reserved
|
|
*
|
|
* The license below extends only to copyright in the software and shall
|
|
* not be construed as granting a license to any other intellectual
|
|
* property including but not limited to intellectual property relating
|
|
* to a hardware implementation of the functionality of the software
|
|
* licensed hereunder. You may use the software subject to the license
|
|
* terms below provided that you ensure that this notice is replicated
|
|
* unmodified and in its entirety in all distributions of the software,
|
|
* modified or unmodified, in source code or in binary form.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Authors: Radhika Jagtap
|
|
* Andreas Hansson
|
|
* Thomas Grass
|
|
*/
|
|
|
|
#include "cpu/o3/probe/elastic_trace.hh"
|
|
|
|
#include "base/callback.hh"
|
|
#include "base/output.hh"
|
|
#include "base/trace.hh"
|
|
#include "cpu/reg_class.hh"
|
|
#include "debug/ElasticTrace.hh"
|
|
#include "mem/packet.hh"
|
|
|
|
ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
|
|
: ProbeListenerObject(params),
|
|
regEtraceListenersEvent(this),
|
|
firstWin(true),
|
|
lastClearedSeqNum(0),
|
|
depWindowSize(params->depWindowSize),
|
|
dataTraceStream(nullptr),
|
|
instTraceStream(nullptr),
|
|
startTraceInst(params->startTraceInst),
|
|
allProbesReg(false),
|
|
traceVirtAddr(params->traceVirtAddr)
|
|
{
|
|
cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
|
|
fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
|
|
"support dependency tracing.\n", name());
|
|
|
|
fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
|
|
"Recommended size is 3x ROB size in the O3CPU.\n");
|
|
|
|
fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
|
|
"single-threaded workload only", cpu->numThreads, name());
|
|
// Initialize the protobuf output stream
|
|
fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
|
|
"trace file path to instFetchTraceFile");
|
|
fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
|
|
"trace file path to dataDepTraceFile");
|
|
std::string filename = simout.resolve(name() + "." +
|
|
params->instFetchTraceFile);
|
|
instTraceStream = new ProtoOutputStream(filename);
|
|
filename = simout.resolve(name() + "." + params->dataDepTraceFile);
|
|
dataTraceStream = new ProtoOutputStream(filename);
|
|
// Create a protobuf message for the header and write it to the stream
|
|
ProtoMessage::PacketHeader inst_pkt_header;
|
|
inst_pkt_header.set_obj_id(name());
|
|
inst_pkt_header.set_tick_freq(SimClock::Frequency);
|
|
instTraceStream->write(inst_pkt_header);
|
|
// Create a protobuf message for the header and write it to
|
|
// the stream
|
|
ProtoMessage::InstDepRecordHeader data_rec_header;
|
|
data_rec_header.set_obj_id(name());
|
|
data_rec_header.set_tick_freq(SimClock::Frequency);
|
|
data_rec_header.set_window_size(depWindowSize);
|
|
dataTraceStream->write(data_rec_header);
|
|
// Register a callback to flush trace records and close the output streams.
|
|
Callback* cb = new MakeCallback<ElasticTrace,
|
|
&ElasticTrace::flushTraces>(this);
|
|
registerExitCallback(cb);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::regProbeListeners()
|
|
{
|
|
inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
|
|
curTick(), startTraceInst);
|
|
if (startTraceInst == 0) {
|
|
// If we want to start tracing from the start of the simulation,
|
|
// register all elastic trace probes now.
|
|
regEtraceListeners();
|
|
} else {
|
|
// Schedule an event to register all elastic trace probes when
|
|
// specified no. of instructions are committed.
|
|
cpu->comInstEventQueue[(ThreadID)0]->schedule(®EtraceListenersEvent,
|
|
startTraceInst);
|
|
}
|
|
}
|
|
|
|
void
|
|
ElasticTrace::regEtraceListeners()
|
|
{
|
|
assert(!allProbesReg);
|
|
inform("@%llu: No. of instructions committed = %llu, registering elastic"
|
|
" probe listeners", curTick(), cpu->numSimulatedInsts());
|
|
// Create new listeners: provide method to be called upon a notify() for
|
|
// each probe point.
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
|
|
"FetchRequest", &ElasticTrace::fetchReqTrace));
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
|
|
"Execute", &ElasticTrace::recordExecTick));
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
|
|
"ToCommit", &ElasticTrace::recordToCommTick));
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
|
|
"Rename", &ElasticTrace::updateRegDep));
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
|
|
"SquashInRename", &ElasticTrace::removeRegDepMapEntry));
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
|
|
"Squash", &ElasticTrace::addSquashedInst));
|
|
listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
|
|
"Commit", &ElasticTrace::addCommittedInst));
|
|
allProbesReg = true;
|
|
}
|
|
|
|
void
|
|
ElasticTrace::fetchReqTrace(const RequestPtr &req)
|
|
{
|
|
|
|
DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
|
|
(MemCmd::ReadReq),
|
|
req->getPC(), req->getVaddr(), req->getPaddr(),
|
|
req->getFlags(), req->getSize(), curTick());
|
|
|
|
// Create a protobuf message including the request fields necessary to
|
|
// recreate the request in the TraceCPU.
|
|
ProtoMessage::Packet inst_fetch_pkt;
|
|
inst_fetch_pkt.set_tick(curTick());
|
|
inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
|
|
inst_fetch_pkt.set_pc(req->getPC());
|
|
inst_fetch_pkt.set_flags(req->getFlags());
|
|
inst_fetch_pkt.set_addr(req->getPaddr());
|
|
inst_fetch_pkt.set_size(req->getSize());
|
|
// Write the message to the stream.
|
|
instTraceStream->write(inst_fetch_pkt);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst)
|
|
{
|
|
|
|
// In a corner case, a retired instruction is propagated backward to the
|
|
// IEW instruction queue to handle some side-channel information. But we
|
|
// must not process an instruction again. So we test the sequence number
|
|
// against the lastClearedSeqNum and skip adding the instruction for such
|
|
// corner cases.
|
|
if (dyn_inst->seqNum <= lastClearedSeqNum) {
|
|
DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
|
|
has already retired (mostly squashed)", dyn_inst->seqNum);
|
|
// Do nothing as program has proceeded and this inst has been
|
|
// propagated backwards to handle something.
|
|
return;
|
|
}
|
|
|
|
DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
|
|
curTick());
|
|
// Either the execution info object will already exist if this
|
|
// instruction had a register dependency recorded in the rename probe
|
|
// listener before entering execute stage or it will not exist and will
|
|
// need to be created here.
|
|
InstExecInfo* exec_info_ptr;
|
|
auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
|
|
if (itr_exec_info != tempStore.end()) {
|
|
exec_info_ptr = itr_exec_info->second;
|
|
} else {
|
|
exec_info_ptr = new InstExecInfo;
|
|
tempStore[dyn_inst->seqNum] = exec_info_ptr;
|
|
}
|
|
|
|
exec_info_ptr->executeTick = curTick();
|
|
maxTempStoreSize = std::max(tempStore.size(),
|
|
(std::size_t)maxTempStoreSize.value());
|
|
}
|
|
|
|
void
|
|
ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst)
|
|
{
|
|
// If tracing has just been enabled then the instruction at this stage of
|
|
// execution is far enough that we cannot gather info about its past like
|
|
// the tick it started execution. Simply return until we see an instruction
|
|
// that is found in the tempStore.
|
|
auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
|
|
if (itr_exec_info == tempStore.end()) {
|
|
DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
|
|
" skipping.\n", dyn_inst->seqNum);
|
|
return;
|
|
}
|
|
|
|
DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
|
|
curTick());
|
|
InstExecInfo* exec_info_ptr = itr_exec_info->second;
|
|
exec_info_ptr->toCommitTick = curTick();
|
|
|
|
}
|
|
|
|
void
|
|
ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst)
|
|
{
|
|
// Get the sequence number of the instruction
|
|
InstSeqNum seq_num = dyn_inst->seqNum;
|
|
|
|
assert(dyn_inst->seqNum > lastClearedSeqNum);
|
|
|
|
// Since this is the first probe activated in the pipeline, create
|
|
// a new execution info object to track this instruction as it
|
|
// progresses through the pipeline.
|
|
InstExecInfo* exec_info_ptr = new InstExecInfo;
|
|
tempStore[seq_num] = exec_info_ptr;
|
|
|
|
// Loop through the source registers and look up the dependency map. If
|
|
// the source register entry is found in the dependency map, add a
|
|
// dependency on the last writer.
|
|
int8_t max_regs = dyn_inst->numSrcRegs();
|
|
for (int src_idx = 0; src_idx < max_regs; src_idx++) {
|
|
// Get the physical register index of the i'th source register.
|
|
PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
|
|
DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num,
|
|
src_reg);
|
|
auto itr_last_writer = physRegDepMap.find(src_reg);
|
|
if (itr_last_writer != physRegDepMap.end()) {
|
|
InstSeqNum last_writer = itr_last_writer->second;
|
|
// Additionally the dependency distance is kept less than the window
|
|
// size parameter to limit the memory allocation to nodes in the
|
|
// graph. If the window were tending to infinite we would have to
|
|
// load a large number of node objects during replay.
|
|
if (seq_num - last_writer < depWindowSize) {
|
|
// Record a physical register dependency.
|
|
exec_info_ptr->physRegDepSet.insert(last_writer);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Loop through the destination registers of this instruction and update
|
|
// the physical register dependency map for last writers to registers.
|
|
max_regs = dyn_inst->numDestRegs();
|
|
for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
|
|
// For data dependency tracking the register must be an int, float or
|
|
// CC register and not a Misc register.
|
|
TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx);
|
|
if (regIdxToClass(dest_reg) != MiscRegClass) {
|
|
// Get the physical register index of the i'th destination register.
|
|
dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
|
|
if (dest_reg != TheISA::ZeroReg) {
|
|
DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n",
|
|
seq_num, dest_reg);
|
|
physRegDepMap[dest_reg] = seq_num;
|
|
}
|
|
}
|
|
}
|
|
maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
|
|
(std::size_t)maxPhysRegDepMapSize.value());
|
|
}
|
|
|
|
void
|
|
ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
|
|
{
|
|
DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
|
|
inst_reg_pair.second);
|
|
auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
|
|
if (itr_regdep_map != physRegDepMap.end())
|
|
physRegDepMap.erase(itr_regdep_map);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::addSquashedInst(const DynInstPtr &head_inst)
|
|
{
|
|
// If the squashed instruction was squashed before being processed by
|
|
// execute stage then it will not be in the temporary store. In this case
|
|
// do nothing and return.
|
|
auto itr_exec_info = tempStore.find(head_inst->seqNum);
|
|
if (itr_exec_info == tempStore.end())
|
|
return;
|
|
|
|
// If there is a squashed load for which a read request was
|
|
// sent before it got squashed then add it to the trace.
|
|
DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
|
|
head_inst->seqNum);
|
|
// Get pointer to the execution info object corresponding to the inst.
|
|
InstExecInfo* exec_info_ptr = itr_exec_info->second;
|
|
if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
|
|
exec_info_ptr->toCommitTick != MaxTick &&
|
|
head_inst->hasRequest() &&
|
|
head_inst->getFault() == NoFault) {
|
|
// Add record to depTrace with commit parameter as false.
|
|
addDepTraceRecord(head_inst, exec_info_ptr, false);
|
|
}
|
|
// As the information contained is no longer needed, remove the execution
|
|
// info object from the temporary store.
|
|
clearTempStoreUntil(head_inst);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::addCommittedInst(const DynInstPtr &head_inst)
|
|
{
|
|
DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
|
|
head_inst->seqNum);
|
|
|
|
// Add the instruction to the depTrace.
|
|
if (!head_inst->isNop()) {
|
|
|
|
// If tracing has just been enabled then the instruction at this stage
|
|
// of execution is far enough that we cannot gather info about its past
|
|
// like the tick it started execution. Simply return until we see an
|
|
// instruction that is found in the tempStore.
|
|
auto itr_temp_store = tempStore.find(head_inst->seqNum);
|
|
if (itr_temp_store == tempStore.end()) {
|
|
DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
|
|
"store, skipping.\n", head_inst->seqNum);
|
|
return;
|
|
}
|
|
|
|
// Get pointer to the execution info object corresponding to the inst.
|
|
InstExecInfo* exec_info_ptr = itr_temp_store->second;
|
|
assert(exec_info_ptr->executeTick != MaxTick);
|
|
assert(exec_info_ptr->toCommitTick != MaxTick);
|
|
|
|
// Check if the instruction had a fault, if it predicated false and
|
|
// thus previous register values were restored or if it was a
|
|
// load/store that did not have a request (e.g. when the size of the
|
|
// request is zero). In all these cases the instruction is set as
|
|
// executed and is picked up by the commit probe listener. But a
|
|
// request is not issued and registers are not written. So practically,
|
|
// skipping these should not hurt as execution would not stall on them.
|
|
// Alternatively, these could be included merely as a compute node in
|
|
// the graph. Removing these for now. If correlation accuracy needs to
|
|
// be improved in future these can be turned into comp nodes at the
|
|
// cost of bigger traces.
|
|
if (head_inst->getFault() != NoFault) {
|
|
DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
|
|
"skip adding it to the trace\n",
|
|
(head_inst->isMemRef() ? "Load/store" : "Comp inst."),
|
|
head_inst->seqNum);
|
|
} else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
|
|
DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
|
|
"skip adding it to the trace\n", head_inst->seqNum);
|
|
} else if (!head_inst->readPredicate()) {
|
|
DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
|
|
"skip adding it to the trace\n",
|
|
(head_inst->isMemRef() ? "Load/store" : "Comp inst."),
|
|
head_inst->seqNum);
|
|
} else {
|
|
// Add record to depTrace with commit parameter as true.
|
|
addDepTraceRecord(head_inst, exec_info_ptr, true);
|
|
}
|
|
}
|
|
// As the information contained is no longer needed, remove the execution
|
|
// info object from the temporary store.
|
|
clearTempStoreUntil(head_inst);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst,
|
|
InstExecInfo* exec_info_ptr, bool commit)
|
|
{
|
|
// Create a record to assign dynamic intruction related fields.
|
|
TraceInfo* new_record = new TraceInfo;
|
|
// Add to map for sequence number look up to retrieve the TraceInfo pointer
|
|
traceInfoMap[head_inst->seqNum] = new_record;
|
|
|
|
// Assign fields from the instruction
|
|
new_record->instNum = head_inst->seqNum;
|
|
new_record->commit = commit;
|
|
new_record->type = head_inst->isLoad() ? Record::LOAD :
|
|
(head_inst->isStore() ? Record::STORE :
|
|
Record::COMP);
|
|
|
|
// Assign fields for creating a request in case of a load/store
|
|
new_record->reqFlags = head_inst->memReqFlags;
|
|
new_record->virtAddr = head_inst->effAddr;
|
|
new_record->asid = head_inst->asid;
|
|
new_record->physAddr = head_inst->physEffAddrLow;
|
|
// Currently the tracing does not support split requests.
|
|
new_record->size = head_inst->effSize;
|
|
new_record->pc = head_inst->instAddr();
|
|
|
|
// Assign the timing information stored in the execution info object
|
|
new_record->executeTick = exec_info_ptr->executeTick;
|
|
new_record->toCommitTick = exec_info_ptr->toCommitTick;
|
|
new_record->commitTick = curTick();
|
|
|
|
// Assign initial values for number of dependents and computational delay
|
|
new_record->numDepts = 0;
|
|
new_record->compDelay = -1;
|
|
|
|
// The physical register dependency set of the first instruction is
|
|
// empty. Since there are no records in the depTrace at this point, the
|
|
// case of adding an ROB dependency by using a reverse iterator is not
|
|
// applicable. Thus, populate the fields of the record corresponding to the
|
|
// first instruction and return.
|
|
if (depTrace.empty()) {
|
|
// Store the record in depTrace.
|
|
depTrace.push_back(new_record);
|
|
DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
|
|
new_record->instNum);
|
|
return;
|
|
}
|
|
|
|
// Clear register dependencies for squashed loads as they may be dependent
|
|
// on squashed instructions and we do not add those to the trace.
|
|
if (head_inst->isLoad() && !commit) {
|
|
(exec_info_ptr->physRegDepSet).clear();
|
|
}
|
|
|
|
// Assign the register dependencies stored in the execution info object
|
|
std::set<InstSeqNum>::const_iterator dep_set_it;
|
|
for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
|
|
dep_set_it != (exec_info_ptr->physRegDepSet).end();
|
|
++dep_set_it) {
|
|
auto trace_info_itr = traceInfoMap.find(*dep_set_it);
|
|
if (trace_info_itr != traceInfoMap.end()) {
|
|
// The register dependency is valid. Assign it and calculate
|
|
// computational delay
|
|
new_record->physRegDepList.push_back(*dep_set_it);
|
|
DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
|
|
"%lli\n", new_record->instNum, *dep_set_it);
|
|
TraceInfo* reg_dep = trace_info_itr->second;
|
|
reg_dep->numDepts++;
|
|
compDelayPhysRegDep(reg_dep, new_record);
|
|
++numRegDep;
|
|
} else {
|
|
// The instruction that this has a register dependency on was
|
|
// not added to the trace because of one of the following
|
|
// 1. it was an instruction that had a fault
|
|
// 2. it was an instruction that was predicated false and
|
|
// previous register values were restored
|
|
// 3. it was load/store that did not have a request (e.g. when
|
|
// the size of the request is zero but this may not be a fault)
|
|
// In all these cases the instruction is set as executed and is
|
|
// picked up by the commit probe listener. But a request is not
|
|
// issued and registers are not written to in these cases.
|
|
DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
|
|
"%lli is skipped\n",new_record->instNum, *dep_set_it);
|
|
}
|
|
}
|
|
|
|
// Check for and assign an ROB dependency in addition to register
|
|
// dependency before adding the record to the trace.
|
|
// As stores have to commit in order a store is dependent on the last
|
|
// committed load/store. This is recorded in the ROB dependency.
|
|
if (head_inst->isStore()) {
|
|
// Look up store-after-store order dependency
|
|
updateCommitOrderDep(new_record, false);
|
|
// Look up store-after-load order dependency
|
|
updateCommitOrderDep(new_record, true);
|
|
}
|
|
|
|
// In case a node is dependency-free or its dependency got discarded
|
|
// because it was outside the window, it is marked ready in the ROB at the
|
|
// time of issue. A request is sent as soon as possible. To model this, a
|
|
// node is assigned an issue order dependency on a committed instruction
|
|
// that completed earlier than it. This is done to avoid the problem of
|
|
// determining the issue times of such dependency-free nodes during replay
|
|
// which could lead to too much parallelism, thinking conservatively.
|
|
if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
|
|
updateIssueOrderDep(new_record);
|
|
}
|
|
|
|
// Store the record in depTrace.
|
|
depTrace.push_back(new_record);
|
|
DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
|
|
(commit ? "committed" : "squashed"), new_record->instNum);
|
|
|
|
// To process the number of records specified by depWindowSize in the
|
|
// forward direction, the depTrace must have twice as many records
|
|
// to check for dependencies.
|
|
if (depTrace.size() == 2 * depWindowSize) {
|
|
|
|
DPRINTF(ElasticTrace, "Writing out trace...\n");
|
|
|
|
// Write out the records which have been processed to the trace
|
|
// and remove them from the depTrace.
|
|
writeDepTrace(depWindowSize);
|
|
|
|
// After the first window, writeDepTrace() must check for valid
|
|
// compDelay.
|
|
firstWin = false;
|
|
}
|
|
}
|
|
|
|
void
|
|
ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
|
|
bool find_load_not_store)
|
|
{
|
|
assert(new_record->isStore());
|
|
// Iterate in reverse direction to search for the last committed
|
|
// load/store that completed earlier than the new record
|
|
depTraceRevItr from_itr(depTrace.end());
|
|
depTraceRevItr until_itr(depTrace.begin());
|
|
TraceInfo* past_record = *from_itr;
|
|
uint32_t num_go_back = 0;
|
|
|
|
// The execution time of this store is when it is sent, that is committed
|
|
Tick execute_tick = curTick();
|
|
// Search for store-after-load or store-after-store order dependency
|
|
while (num_go_back < depWindowSize && from_itr != until_itr) {
|
|
if (find_load_not_store) {
|
|
// Check if previous inst is a load completed earlier by comparing
|
|
// with execute tick
|
|
if (hasLoadCompleted(past_record, execute_tick)) {
|
|
// Assign rob dependency and calculate the computational delay
|
|
assignRobDep(past_record, new_record);
|
|
++numOrderDepStores;
|
|
return;
|
|
}
|
|
} else {
|
|
// Check if previous inst is a store sent earlier by comparing with
|
|
// execute tick
|
|
if (hasStoreCommitted(past_record, execute_tick)) {
|
|
// Assign rob dependency and calculate the computational delay
|
|
assignRobDep(past_record, new_record);
|
|
++numOrderDepStores;
|
|
return;
|
|
}
|
|
}
|
|
++from_itr;
|
|
past_record = *from_itr;
|
|
++num_go_back;
|
|
}
|
|
}
|
|
|
|
void
|
|
ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
|
|
{
|
|
// Interate in reverse direction to search for the last committed
|
|
// record that completed earlier than the new record
|
|
depTraceRevItr from_itr(depTrace.end());
|
|
depTraceRevItr until_itr(depTrace.begin());
|
|
TraceInfo* past_record = *from_itr;
|
|
|
|
uint32_t num_go_back = 0;
|
|
Tick execute_tick = 0;
|
|
|
|
if (new_record->isLoad()) {
|
|
// The execution time of a load is when a request is sent
|
|
execute_tick = new_record->executeTick;
|
|
++numIssueOrderDepLoads;
|
|
} else if (new_record->isStore()) {
|
|
// The execution time of a store is when it is sent, i.e. committed
|
|
execute_tick = curTick();
|
|
++numIssueOrderDepStores;
|
|
} else {
|
|
// The execution time of a non load/store is when it completes
|
|
execute_tick = new_record->toCommitTick;
|
|
++numIssueOrderDepOther;
|
|
}
|
|
|
|
// We search if this record has an issue order dependency on a past record.
|
|
// Once we find it, we update both the new record and the record it depends
|
|
// on and return.
|
|
while (num_go_back < depWindowSize && from_itr != until_itr) {
|
|
// Check if a previous inst is a load sent earlier, or a store sent
|
|
// earlier, or a comp inst completed earlier by comparing with execute
|
|
// tick
|
|
if (hasLoadBeenSent(past_record, execute_tick) ||
|
|
hasStoreCommitted(past_record, execute_tick) ||
|
|
hasCompCompleted(past_record, execute_tick)) {
|
|
// Assign rob dependency and calculate the computational delay
|
|
assignRobDep(past_record, new_record);
|
|
return;
|
|
}
|
|
++from_itr;
|
|
past_record = *from_itr;
|
|
++num_go_back;
|
|
}
|
|
}
|
|
|
|
void
|
|
ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
|
|
DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
|
|
new_record->typeToStr(), new_record->instNum,
|
|
past_record->instNum);
|
|
// Add dependency on past record
|
|
new_record->robDepList.push_back(past_record->instNum);
|
|
// Update new_record's compute delay with respect to the past record
|
|
compDelayRob(past_record, new_record);
|
|
// Increment number of dependents of the past record
|
|
++(past_record->numDepts);
|
|
// Update stat to log max number of dependents
|
|
maxNumDependents = std::max(past_record->numDepts,
|
|
(uint32_t)maxNumDependents.value());
|
|
}
|
|
|
|
bool
|
|
ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
|
|
Tick execute_tick) const
|
|
{
|
|
return (past_record->isStore() && past_record->commitTick <= execute_tick);
|
|
}
|
|
|
|
bool
|
|
ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
|
|
Tick execute_tick) const
|
|
{
|
|
return(past_record->isLoad() && past_record->commit &&
|
|
past_record->toCommitTick <= execute_tick);
|
|
}
|
|
|
|
bool
|
|
ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
|
|
Tick execute_tick) const
|
|
{
|
|
// Check if previous inst is a load sent earlier than this
|
|
return (past_record->isLoad() && past_record->commit &&
|
|
past_record->executeTick <= execute_tick);
|
|
}
|
|
|
|
bool
|
|
ElasticTrace::hasCompCompleted(TraceInfo* past_record,
|
|
Tick execute_tick) const
|
|
{
|
|
return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst)
|
|
{
|
|
// Clear from temp store starting with the execution info object
|
|
// corresponding the head_inst and continue clearing by decrementing the
|
|
// sequence number until the last cleared sequence number.
|
|
InstSeqNum temp_sn = (head_inst->seqNum);
|
|
while (temp_sn > lastClearedSeqNum) {
|
|
auto itr_exec_info = tempStore.find(temp_sn);
|
|
if (itr_exec_info != tempStore.end()) {
|
|
InstExecInfo* exec_info_ptr = itr_exec_info->second;
|
|
// Free allocated memory for the info object
|
|
delete exec_info_ptr;
|
|
// Remove entry from temporary store
|
|
tempStore.erase(itr_exec_info);
|
|
}
|
|
temp_sn--;
|
|
}
|
|
// Update the last cleared sequence number to that of the head_inst
|
|
lastClearedSeqNum = head_inst->seqNum;
|
|
}
|
|
|
|
void
|
|
ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
|
|
{
|
|
// The computation delay is the delay between the completion tick of the
|
|
// inst. pointed to by past_record and the execution tick of its dependent
|
|
// inst. pointed to by new_record.
|
|
int64_t comp_delay = -1;
|
|
Tick execution_tick = 0, completion_tick = 0;
|
|
|
|
DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
|
|
new_record->instNum, past_record->instNum);
|
|
|
|
// Get the tick when the node is executed as per the modelling of
|
|
// computation delay
|
|
execution_tick = new_record->getExecuteTick();
|
|
|
|
if (past_record->isLoad()) {
|
|
if (new_record->isStore()) {
|
|
completion_tick = past_record->toCommitTick;
|
|
} else {
|
|
completion_tick = past_record->executeTick;
|
|
}
|
|
} else if (past_record->isStore()) {
|
|
completion_tick = past_record->commitTick;
|
|
} else if (past_record->isComp()){
|
|
completion_tick = past_record->toCommitTick;
|
|
}
|
|
assert(execution_tick >= completion_tick);
|
|
comp_delay = execution_tick - completion_tick;
|
|
|
|
DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
|
|
execution_tick, completion_tick, comp_delay);
|
|
|
|
// Assign the computational delay with respect to the dependency which
|
|
// completes the latest.
|
|
if (new_record->compDelay == -1)
|
|
new_record->compDelay = comp_delay;
|
|
else
|
|
new_record->compDelay = std::min(comp_delay, new_record->compDelay);
|
|
DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
|
|
new_record->compDelay);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
|
|
TraceInfo* new_record)
|
|
{
|
|
// The computation delay is the delay between the completion tick of the
|
|
// inst. pointed to by past_record and the execution tick of its dependent
|
|
// inst. pointed to by new_record.
|
|
int64_t comp_delay = -1;
|
|
Tick execution_tick = 0, completion_tick = 0;
|
|
|
|
DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
|
|
" %lli.\n", new_record->instNum, past_record->instNum);
|
|
|
|
// Get the tick when the node is executed as per the modelling of
|
|
// computation delay
|
|
execution_tick = new_record->getExecuteTick();
|
|
|
|
// When there is a physical register dependency on an instruction, the
|
|
// completion tick of that instruction is when it wrote to the register,
|
|
// that is toCommitTick. In case, of a store updating a destination
|
|
// register, this is approximated to commitTick instead
|
|
if (past_record->isStore()) {
|
|
completion_tick = past_record->commitTick;
|
|
} else {
|
|
completion_tick = past_record->toCommitTick;
|
|
}
|
|
assert(execution_tick >= completion_tick);
|
|
comp_delay = execution_tick - completion_tick;
|
|
DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
|
|
execution_tick, completion_tick, comp_delay);
|
|
|
|
// Assign the computational delay with respect to the dependency which
|
|
// completes the latest.
|
|
if (new_record->compDelay == -1)
|
|
new_record->compDelay = comp_delay;
|
|
else
|
|
new_record->compDelay = std::min(comp_delay, new_record->compDelay);
|
|
DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
|
|
new_record->compDelay);
|
|
}
|
|
|
|
Tick
|
|
ElasticTrace::TraceInfo::getExecuteTick() const
|
|
{
|
|
if (isLoad()) {
|
|
// Execution tick for a load instruction is when the request was sent,
|
|
// that is executeTick.
|
|
return executeTick;
|
|
} else if (isStore()) {
|
|
// Execution tick for a store instruction is when the request was sent,
|
|
// that is commitTick.
|
|
return commitTick;
|
|
} else {
|
|
// Execution tick for a non load/store instruction is when the register
|
|
// value was written to, that is commitTick.
|
|
return toCommitTick;
|
|
}
|
|
}
|
|
|
|
void
|
|
ElasticTrace::writeDepTrace(uint32_t num_to_write)
|
|
{
|
|
// Write the trace with fields as follows:
|
|
// Instruction sequence number
|
|
// If instruction was a load
|
|
// If instruction was a store
|
|
// If instruction has addr
|
|
// If instruction has size
|
|
// If instruction has flags
|
|
// List of order dependencies - optional, repeated
|
|
// Computational delay with respect to last completed dependency
|
|
// List of physical register RAW dependencies - optional, repeated
|
|
// Weight of a node equal to no. of filtered nodes before it - optional
|
|
uint16_t num_filtered_nodes = 0;
|
|
depTraceItr dep_trace_itr(depTrace.begin());
|
|
depTraceItr dep_trace_itr_start = dep_trace_itr;
|
|
while (num_to_write > 0) {
|
|
TraceInfo* temp_ptr = *dep_trace_itr;
|
|
assert(temp_ptr->type != Record::INVALID);
|
|
// If no node dependends on a comp node then there is no reason to
|
|
// track the comp node in the dependency graph. We filter out such
|
|
// nodes but count them and add a weight field to the subsequent node
|
|
// that we do include in the trace.
|
|
if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
|
|
DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
|
|
"is as follows:\n", temp_ptr->instNum);
|
|
if (temp_ptr->isLoad() || temp_ptr->isStore()) {
|
|
DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
|
|
DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
|
|
"size %i, flags %i\n", temp_ptr->physAddr,
|
|
temp_ptr->size, temp_ptr->reqFlags);
|
|
} else {
|
|
DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
|
|
}
|
|
if (firstWin && temp_ptr->compDelay == -1) {
|
|
if (temp_ptr->isLoad()) {
|
|
temp_ptr->compDelay = temp_ptr->executeTick;
|
|
} else if (temp_ptr->isStore()) {
|
|
temp_ptr->compDelay = temp_ptr->commitTick;
|
|
} else {
|
|
temp_ptr->compDelay = temp_ptr->toCommitTick;
|
|
}
|
|
}
|
|
assert(temp_ptr->compDelay != -1);
|
|
DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
|
|
temp_ptr->compDelay);
|
|
|
|
// Create a protobuf message for the dependency record
|
|
ProtoMessage::InstDepRecord dep_pkt;
|
|
dep_pkt.set_seq_num(temp_ptr->instNum);
|
|
dep_pkt.set_type(temp_ptr->type);
|
|
dep_pkt.set_pc(temp_ptr->pc);
|
|
if (temp_ptr->isLoad() || temp_ptr->isStore()) {
|
|
dep_pkt.set_flags(temp_ptr->reqFlags);
|
|
dep_pkt.set_p_addr(temp_ptr->physAddr);
|
|
// If tracing of virtual addresses is enabled, set the optional
|
|
// field for it
|
|
if (traceVirtAddr) {
|
|
dep_pkt.set_v_addr(temp_ptr->virtAddr);
|
|
dep_pkt.set_asid(temp_ptr->asid);
|
|
}
|
|
dep_pkt.set_size(temp_ptr->size);
|
|
}
|
|
dep_pkt.set_comp_delay(temp_ptr->compDelay);
|
|
if (temp_ptr->robDepList.empty()) {
|
|
DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
|
|
}
|
|
while (!temp_ptr->robDepList.empty()) {
|
|
DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
|
|
temp_ptr->robDepList.front());
|
|
dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
|
|
temp_ptr->robDepList.pop_front();
|
|
}
|
|
if (temp_ptr->physRegDepList.empty()) {
|
|
DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
|
|
}
|
|
while (!temp_ptr->physRegDepList.empty()) {
|
|
DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
|
|
temp_ptr->physRegDepList.front());
|
|
dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
|
|
temp_ptr->physRegDepList.pop_front();
|
|
}
|
|
if (num_filtered_nodes != 0) {
|
|
// Set the weight of this node as the no. of filtered nodes
|
|
// between this node and the last node that we wrote to output
|
|
// stream. The weight will be used during replay to model ROB
|
|
// occupancy of filtered nodes.
|
|
dep_pkt.set_weight(num_filtered_nodes);
|
|
num_filtered_nodes = 0;
|
|
}
|
|
// Write the message to the protobuf output stream
|
|
dataTraceStream->write(dep_pkt);
|
|
} else {
|
|
// Don't write the node to the trace but note that we have filtered
|
|
// out a node.
|
|
++numFilteredNodes;
|
|
++num_filtered_nodes;
|
|
}
|
|
dep_trace_itr++;
|
|
traceInfoMap.erase(temp_ptr->instNum);
|
|
delete temp_ptr;
|
|
num_to_write--;
|
|
}
|
|
depTrace.erase(dep_trace_itr_start, dep_trace_itr);
|
|
}
|
|
|
|
void
|
|
ElasticTrace::regStats() {
|
|
using namespace Stats;
|
|
numRegDep
|
|
.name(name() + ".numRegDep")
|
|
.desc("Number of register dependencies recorded during tracing")
|
|
;
|
|
|
|
numOrderDepStores
|
|
.name(name() + ".numOrderDepStores")
|
|
.desc("Number of commit order (rob) dependencies for a store recorded"
|
|
" on a past load/store during tracing")
|
|
;
|
|
|
|
numIssueOrderDepLoads
|
|
.name(name() + ".numIssueOrderDepLoads")
|
|
.desc("Number of loads that got assigned issue order dependency"
|
|
" because they were dependency-free")
|
|
;
|
|
|
|
numIssueOrderDepStores
|
|
.name(name() + ".numIssueOrderDepStores")
|
|
.desc("Number of stores that got assigned issue order dependency"
|
|
" because they were dependency-free")
|
|
;
|
|
|
|
numIssueOrderDepOther
|
|
.name(name() + ".numIssueOrderDepOther")
|
|
.desc("Number of non load/store insts that got assigned issue order"
|
|
" dependency because they were dependency-free")
|
|
;
|
|
|
|
numFilteredNodes
|
|
.name(name() + ".numFilteredNodes")
|
|
.desc("No. of nodes filtered out before writing the output trace")
|
|
;
|
|
|
|
maxNumDependents
|
|
.name(name() + ".maxNumDependents")
|
|
.desc("Maximum number or dependents on any instruction")
|
|
;
|
|
|
|
maxTempStoreSize
|
|
.name(name() + ".maxTempStoreSize")
|
|
.desc("Maximum size of the temporary store during the run")
|
|
;
|
|
|
|
maxPhysRegDepMapSize
|
|
.name(name() + ".maxPhysRegDepMapSize")
|
|
.desc("Maximum size of register dependency map")
|
|
;
|
|
}
|
|
|
|
const std::string&
|
|
ElasticTrace::TraceInfo::typeToStr() const
|
|
{
|
|
return Record::RecordType_Name(type);
|
|
}
|
|
|
|
const std::string
|
|
ElasticTrace::name() const
|
|
{
|
|
return ProbeListenerObject::name();
|
|
}
|
|
|
|
void
|
|
ElasticTrace::flushTraces()
|
|
{
|
|
// Write to trace all records in the depTrace.
|
|
writeDepTrace(depTrace.size());
|
|
// Delete the stream objects
|
|
delete dataTraceStream;
|
|
delete instTraceStream;
|
|
}
|
|
|
|
ElasticTrace*
|
|
ElasticTraceParams::create()
|
|
{
|
|
return new ElasticTrace(this);
|
|
}
|