54519fd51f
This patch adds support to optionally capture the virtual address and asid for load/store instructions in the elastic traces. If they are present in the traces, Trace CPU will set those fields of the request during replay.
563 lines
22 KiB
C++
563 lines
22 KiB
C++
/*
|
|
* Copyright (c) 2013 - 2015 ARM Limited
|
|
* All rights reserved
|
|
*
|
|
* The license below extends only to copyright in the software and shall
|
|
* not be construed as granting a license to any other intellectual
|
|
* property including but not limited to intellectual property relating
|
|
* to a hardware implementation of the functionality of the software
|
|
* licensed hereunder. You may use the software subject to the license
|
|
* terms below provided that you ensure that this notice is replicated
|
|
* unmodified and in its entirety in all distributions of the software,
|
|
* modified or unmodified, in source code or in binary form.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Authors: Radhika Jagtap
|
|
* Andreas Hansson
|
|
* Thomas Grass
|
|
*/
|
|
|
|
/**
|
|
* @file This file describes a trace component which is a cpu probe listener
|
|
* used to generate elastic cpu traces. It registers listeners to probe points
|
|
* in the fetch, rename, iew and commit stages of the O3CPU. It processes the
|
|
* dependency graph of the cpu execution and writes out a protobuf trace. It
|
|
* also generates a protobuf trace of the instruction fetch requests.
|
|
*/
|
|
|
|
#ifndef __CPU_O3_PROBE_ELASTIC_TRACE_HH__
|
|
#define __CPU_O3_PROBE_ELASTIC_TRACE_HH__
|
|
|
|
#include <set>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
|
|
#include "cpu/o3/dyn_inst.hh"
|
|
#include "cpu/o3/impl.hh"
|
|
#include "mem/request.hh"
|
|
#include "params/ElasticTrace.hh"
|
|
#include "proto/inst_dep_record.pb.h"
|
|
#include "proto/packet.pb.h"
|
|
#include "proto/protoio.hh"
|
|
#include "sim/eventq.hh"
|
|
#include "sim/probe/probe.hh"
|
|
|
|
/**
|
|
* The elastic trace is a type of probe listener and listens to probe points
|
|
* in multiple stages of the O3CPU. The notify method is called on a probe
|
|
* point typically when an instruction successfully progresses through that
|
|
* stage.
|
|
*
|
|
* As different listener methods mapped to the different probe points execute,
|
|
* relevant information about the instruction, e.g. timestamps and register
|
|
* accesses, are captured and stored in temporary data structures. When the
|
|
* instruction progresses through the commit stage, the timing as well as
|
|
* dependency information about the instruction is finalised and encapsulated in
|
|
* a struct called TraceInfo. TraceInfo objects are collected in a list instead
|
|
* of writing them out to the trace file one a time. This is required as the
|
|
* trace is processed in chunks to evaluate order dependencies and computational
|
|
* delay in case an instruction does not have any register dependencies. By this
|
|
* we achieve a simpler algorithm during replay because every record in the
|
|
* trace can be hooked onto a record in its past. The trace is written out as
|
|
* a protobuf format output file.
|
|
*
|
|
* The output trace can be read in and played back by the TraceCPU.
|
|
*/
|
|
class ElasticTrace : public ProbeListenerObject
|
|
{
|
|
|
|
public:
|
|
typedef typename O3CPUImpl::DynInstPtr DynInstPtr;
|
|
typedef typename std::pair<InstSeqNum, PhysRegIndex> SeqNumRegPair;
|
|
|
|
/** Trace record types corresponding to instruction node types */
|
|
typedef ProtoMessage::InstDepRecord::RecordType RecordType;
|
|
typedef ProtoMessage::InstDepRecord Record;
|
|
|
|
/** Constructor */
|
|
ElasticTrace(const ElasticTraceParams *params);
|
|
|
|
/**
|
|
* Register the probe listeners that is the methods called on a probe point
|
|
* notify() call.
|
|
*/
|
|
void regProbeListeners();
|
|
|
|
/** Register all listeners. */
|
|
void regEtraceListeners();
|
|
|
|
/** Returns the name of the trace probe listener. */
|
|
const std::string name() const;
|
|
|
|
/**
|
|
* Process any outstanding trace records, flush them out to the protobuf
|
|
* output streams and delete the streams at simulation exit.
|
|
*/
|
|
void flushTraces();
|
|
|
|
/**
|
|
* Take the fields of the request class object that are relevant to create
|
|
* an instruction fetch request. It creates a protobuf message containing
|
|
* the request fields and writes it to instTraceStream.
|
|
*
|
|
* @param req pointer to the fetch request
|
|
*/
|
|
void fetchReqTrace(const RequestPtr &req);
|
|
|
|
/**
|
|
* Populate the execute timestamp field in an InstExecInfo object for an
|
|
* instruction in flight.
|
|
*
|
|
* @param dyn_inst pointer to dynamic instruction in flight
|
|
*/
|
|
void recordExecTick(const DynInstPtr &dyn_inst);
|
|
|
|
/**
|
|
* Populate the timestamp field in an InstExecInfo object for an
|
|
* instruction in flight when it is execution is complete and it is ready
|
|
* to commit.
|
|
*
|
|
* @param dyn_inst pointer to dynamic instruction in flight
|
|
*/
|
|
void recordToCommTick(const DynInstPtr &dyn_inst);
|
|
|
|
/**
|
|
* Record a Read After Write physical register dependency if there has
|
|
* been a write to the source register and update the physical register
|
|
* map. For this look up the physRegDepMap with this instruction as the
|
|
* writer of its destination register. If the dependency falls outside the
|
|
* window it is assumed as already complete. Duplicate entries are avoided.
|
|
*
|
|
* @param dyn_inst pointer to dynamic instruction in flight
|
|
*/
|
|
void updateRegDep(const DynInstPtr &dyn_inst);
|
|
|
|
/**
|
|
* When an instruction gets squashed the destination register mapped to it
|
|
* is freed up in the rename stage. Remove the register entry from the
|
|
* physRegDepMap as well to avoid dependencies on squashed instructions.
|
|
*
|
|
* @param inst_reg_pair pair of inst. sequence no. and the register
|
|
*/
|
|
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair);
|
|
|
|
/**
|
|
* Add an instruction that is at the head of the ROB and is squashed only
|
|
* if it is a load and a request was sent for it.
|
|
*
|
|
* @param head_inst pointer to dynamic instruction to be squashed
|
|
*/
|
|
void addSquashedInst(const DynInstPtr &head_inst);
|
|
|
|
/**
|
|
* Add an instruction that is at the head of the ROB and is committed.
|
|
*
|
|
* @param head_inst pointer to dynamic instruction to be committed
|
|
*/
|
|
void addCommittedInst(const DynInstPtr &head_inst);
|
|
|
|
/** Register statistics for the elastic trace. */
|
|
void regStats();
|
|
|
|
/** Event to trigger registering this listener for all probe points. */
|
|
EventWrapper<ElasticTrace,
|
|
&ElasticTrace::regEtraceListeners> regEtraceListenersEvent;
|
|
|
|
private:
|
|
/**
|
|
* Used for checking the first window for processing and writing of
|
|
* dependency trace. At the start of the program there can be dependency-
|
|
* free instructions and such cases are handled differently.
|
|
*/
|
|
bool firstWin;
|
|
|
|
/**
|
|
* @defgroup InstExecInfo Struct for storing information before an
|
|
* instruction reaches the commit stage, e.g. execute timestamp.
|
|
*/
|
|
struct InstExecInfo
|
|
{
|
|
/**
|
|
* @ingroup InstExecInfo
|
|
* @{
|
|
*/
|
|
/** Timestamp when instruction was first processed by execute stage */
|
|
Tick executeTick;
|
|
/**
|
|
* Timestamp when instruction execution is completed in execute stage
|
|
* and instruction is marked as ready to commit
|
|
*/
|
|
Tick toCommitTick;
|
|
/**
|
|
* Set of instruction sequence numbers that this instruction depends on
|
|
* due to Read After Write data dependency based on physical register.
|
|
*/
|
|
std::set<InstSeqNum> physRegDepSet;
|
|
/** @} */
|
|
|
|
/** Constructor */
|
|
InstExecInfo()
|
|
: executeTick(MaxTick),
|
|
toCommitTick(MaxTick)
|
|
{ }
|
|
};
|
|
|
|
/**
|
|
* Temporary store of InstExecInfo objects. Later on when an instruction
|
|
* is processed for commit or retire, if it is chosen to be written to
|
|
* the output trace then this information is looked up using the instruction
|
|
* sequence number as the key. If it is not chosen then the entry for it in
|
|
* the store is cleared.
|
|
*/
|
|
std::unordered_map<InstSeqNum, InstExecInfo*> tempStore;
|
|
|
|
/**
|
|
* The last cleared instruction sequence number used to free up the memory
|
|
* allocated in the temporary store.
|
|
*/
|
|
InstSeqNum lastClearedSeqNum;
|
|
|
|
/**
|
|
* Map for recording the producer of a physical register to check Read
|
|
* After Write dependencies. The key is the renamed physical register and
|
|
* the value is the instruction sequence number of its last producer.
|
|
*/
|
|
std::unordered_map<PhysRegIndex, InstSeqNum> physRegDepMap;
|
|
|
|
/**
|
|
* @defgroup TraceInfo Struct for a record in the instruction dependency
|
|
* trace. All information required to process and calculate the
|
|
* computational delay is stored in TraceInfo objects. The memory request
|
|
* fields for a load or store instruction are also included here. Note
|
|
* that the structure TraceInfo does not store pointers to children
|
|
* or parents. The dependency trace is maintained as an ordered collection
|
|
* of records for writing to the output trace and not as a tree data
|
|
* structure.
|
|
*/
|
|
struct TraceInfo
|
|
{
|
|
/**
|
|
* @ingroup TraceInfo
|
|
* @{
|
|
*/
|
|
/* Instruction sequence number. */
|
|
InstSeqNum instNum;
|
|
/** The type of trace record for the instruction node */
|
|
RecordType type;
|
|
/* Tick when instruction was in execute stage. */
|
|
Tick executeTick;
|
|
/* Tick when instruction was marked ready and sent to commit stage. */
|
|
Tick toCommitTick;
|
|
/* Tick when instruction was committed. */
|
|
Tick commitTick;
|
|
/* If instruction was committed, as against squashed. */
|
|
bool commit;
|
|
/* List of order dependencies. */
|
|
std::list<InstSeqNum> robDepList;
|
|
/* List of physical register RAW dependencies. */
|
|
std::list<InstSeqNum> physRegDepList;
|
|
/**
|
|
* Computational delay after the last dependent inst. completed.
|
|
* A value of -1 which means instruction has no dependencies.
|
|
*/
|
|
int64_t compDelay;
|
|
/* Number of dependents. */
|
|
uint32_t numDepts;
|
|
/* The instruction PC for a load, store or non load/store. */
|
|
Addr pc;
|
|
/* Request flags in case of a load/store instruction */
|
|
Request::FlagsType reqFlags;
|
|
/* Request physical address in case of a load/store instruction */
|
|
Addr physAddr;
|
|
/* Request virtual address in case of a load/store instruction */
|
|
Addr virtAddr;
|
|
/* Address space id in case of a load/store instruction */
|
|
uint32_t asid;
|
|
/* Request size in case of a load/store instruction */
|
|
unsigned size;
|
|
/** Default Constructor */
|
|
TraceInfo()
|
|
: type(Record::INVALID)
|
|
{ }
|
|
/** Is the record a load */
|
|
bool isLoad() const { return (type == Record::LOAD); }
|
|
/** Is the record a store */
|
|
bool isStore() const { return (type == Record::STORE); }
|
|
/** Is the record a fetch triggering an Icache request */
|
|
bool isComp() const { return (type == Record::COMP); }
|
|
/** Return string specifying the type of the node */
|
|
const std::string& typeToStr() const;
|
|
/** @} */
|
|
|
|
/**
|
|
* Get the execute tick of the instruction.
|
|
*
|
|
* @return Tick when instruction was executed
|
|
*/
|
|
Tick getExecuteTick() const;
|
|
};
|
|
|
|
/**
|
|
* The instruction dependency trace containing TraceInfo objects. The
|
|
* container implemented is sequential as dependencies obey commit
|
|
* order (program order). For example, if B is dependent on A then B must
|
|
* be committed after A. Thus records are updated with dependency
|
|
* information and written to the trace in commit order. This ensures that
|
|
* when a graph is reconstructed from the trace during replay, all the
|
|
* dependencies are stored in the graph before the dependent itself is
|
|
* added. This facilitates creating a tree data structure during replay,
|
|
* i.e. adding children as records are read from the trace in an efficient
|
|
* manner.
|
|
*/
|
|
std::vector<TraceInfo*> depTrace;
|
|
|
|
/**
|
|
* Map where the instruction sequence number is mapped to the pointer to
|
|
* the TraceInfo object.
|
|
*/
|
|
std::unordered_map<InstSeqNum, TraceInfo*> traceInfoMap;
|
|
|
|
/** Typedef of iterator to the instruction dependency trace. */
|
|
typedef typename std::vector<TraceInfo*>::iterator depTraceItr;
|
|
|
|
/** Typedef of the reverse iterator to the instruction dependency trace. */
|
|
typedef typename std::reverse_iterator<depTraceItr> depTraceRevItr;
|
|
|
|
/**
|
|
* The maximum distance for a dependency and is set by a top level
|
|
* level parameter. It must be equal to or greater than the number of
|
|
* entries in the ROB. This variable is used as the length of the sliding
|
|
* window for processing the dependency trace.
|
|
*/
|
|
uint32_t depWindowSize;
|
|
|
|
/** Protobuf output stream for data dependency trace */
|
|
ProtoOutputStream* dataTraceStream;
|
|
|
|
/** Protobuf output stream for instruction fetch trace. */
|
|
ProtoOutputStream* instTraceStream;
|
|
|
|
/** Number of instructions after which to enable tracing. */
|
|
const InstSeqNum startTraceInst;
|
|
|
|
/**
|
|
* Whther the elastic trace listener has been registered for all probes.
|
|
*
|
|
* When enabling tracing after a specified number of instructions have
|
|
* committed, check this to prevent re-registering the listener.
|
|
*/
|
|
bool allProbesReg;
|
|
|
|
/** Whether to trace virtual addresses for memory requests. */
|
|
const bool traceVirtAddr;
|
|
|
|
/** Pointer to the O3CPU that is this listener's parent a.k.a. manager */
|
|
FullO3CPU<O3CPUImpl>* cpu;
|
|
|
|
/**
|
|
* Add a record to the dependency trace depTrace which is a sequential
|
|
* container. A record is inserted per committed instruction and in the same
|
|
* order as the order in which instructions are committed.
|
|
*
|
|
* @param head_inst Pointer to the instruction which is head of the
|
|
* ROB and ready to commit
|
|
* @param exec_info_ptr Pointer to InstExecInfo for that instruction
|
|
* @param commit True if instruction is committed, false if squashed
|
|
*/
|
|
void addDepTraceRecord(const DynInstPtr &head_inst,
|
|
InstExecInfo* exec_info_ptr, bool commit);
|
|
|
|
/**
|
|
* Clear entries in the temporary store of execution info objects to free
|
|
* allocated memory until the present instruction being added to the trace.
|
|
*
|
|
* @param head_inst pointer to dynamic instruction
|
|
*/
|
|
void clearTempStoreUntil(const DynInstPtr head_inst);
|
|
|
|
/**
|
|
* Calculate the computational delay between an instruction and a
|
|
* subsequent instruction that has an ROB (order) dependency on it
|
|
*
|
|
* @param past_record Pointer to instruction
|
|
*
|
|
* @param new_record Pointer to subsequent instruction having an ROB
|
|
* dependency on the instruction pointed to by
|
|
* past_record
|
|
*/
|
|
void compDelayRob(TraceInfo* past_record, TraceInfo* new_record);
|
|
|
|
/**
|
|
* Calculate the computational delay between an instruction and a
|
|
* subsequent instruction that has a Physical Register (data) dependency on
|
|
* it.
|
|
*
|
|
* @param past_record Pointer to instruction
|
|
*
|
|
* @param new_record Pointer to subsequent instruction having a Physical
|
|
* Register dependency on the instruction pointed to
|
|
* by past_record
|
|
*/
|
|
void compDelayPhysRegDep(TraceInfo* past_record, TraceInfo* new_record);
|
|
|
|
/**
|
|
* Write out given number of records to the trace starting with the first
|
|
* record in depTrace and iterating through the trace in sequence. A
|
|
* record is deleted after it is written.
|
|
*
|
|
* @param num_to_write Number of records to write to the trace
|
|
*/
|
|
void writeDepTrace(uint32_t num_to_write);
|
|
|
|
/**
|
|
* Reverse iterate through the graph, search for a store-after-store or
|
|
* store-after-load dependency and update the new node's Rob dependency list.
|
|
*
|
|
* If a dependency is found, then call the assignRobDep() method that
|
|
* updates the store with the dependency information. This function is only
|
|
* called when a new store node is added to the trace.
|
|
*
|
|
* @param new_record pointer to new store record
|
|
* @param find_load_not_store true for searching store-after-load and false
|
|
* for searching store-after-store dependency
|
|
*/
|
|
void updateCommitOrderDep(TraceInfo* new_record, bool find_load_not_store);
|
|
|
|
/**
|
|
* Reverse iterate through the graph, search for an issue order dependency
|
|
* for a new node and update the new node's Rob dependency list.
|
|
*
|
|
* If a dependency is found, call the assignRobDep() method that updates
|
|
* the node with its dependency information. This function is called in
|
|
* case a new node to be added to the trace is dependency-free or its
|
|
* dependency got discarded because the dependency was outside the window.
|
|
*
|
|
* @param new_record pointer to new record to be added to the trace
|
|
*/
|
|
void updateIssueOrderDep(TraceInfo* new_record);
|
|
|
|
/**
|
|
* The new_record has an order dependency on a past_record, thus update the
|
|
* new record's Rob dependency list and increment the number of dependents
|
|
* of the past record.
|
|
*
|
|
* @param new_record pointer to new record
|
|
* @param past_record pointer to record that new_record has a rob
|
|
* dependency on
|
|
*/
|
|
void assignRobDep(TraceInfo* past_record, TraceInfo* new_record);
|
|
|
|
/**
|
|
* Check if past record is a store sent earlier than the execute tick.
|
|
*
|
|
* @param past_record pointer to past store
|
|
* @param execute_tick tick with which to compare past store's commit tick
|
|
*
|
|
* @return true if past record is store sent earlier
|
|
*/
|
|
bool hasStoreCommitted(TraceInfo* past_record, Tick execute_tick) const;
|
|
|
|
/**
|
|
* Check if past record is a load that completed earlier than the execute
|
|
* tick.
|
|
*
|
|
* @param past_record pointer to past load
|
|
* @param execute_tick tick with which to compare past load's complete
|
|
* tick
|
|
*
|
|
* @return true if past record is load completed earlier
|
|
*/
|
|
bool hasLoadCompleted(TraceInfo* past_record, Tick execute_tick) const;
|
|
|
|
/**
|
|
* Check if past record is a load sent earlier than the execute tick.
|
|
*
|
|
* @param past_record pointer to past load
|
|
* @param execute_tick tick with which to compare past load's send tick
|
|
*
|
|
* @return true if past record is load sent earlier
|
|
*/
|
|
bool hasLoadBeenSent(TraceInfo* past_record, Tick execute_tick) const;
|
|
|
|
/**
|
|
* Check if past record is a comp node that completed earlier than the
|
|
* execute tick.
|
|
*
|
|
* @param past_record pointer to past comp node
|
|
* @param execute_tick tick with which to compare past comp node's
|
|
* completion tick
|
|
*
|
|
* @return true if past record is comp completed earlier
|
|
*/
|
|
bool hasCompCompleted(TraceInfo* past_record, Tick execute_tick) const;
|
|
|
|
/** Number of register dependencies recorded during tracing */
|
|
Stats::Scalar numRegDep;
|
|
|
|
/**
|
|
* Number of stores that got assigned a commit order dependency
|
|
* on a past load/store.
|
|
*/
|
|
Stats::Scalar numOrderDepStores;
|
|
|
|
/**
|
|
* Number of load insts that got assigned an issue order dependency
|
|
* because they were dependency-free.
|
|
*/
|
|
Stats::Scalar numIssueOrderDepLoads;
|
|
|
|
/**
|
|
* Number of store insts that got assigned an issue order dependency
|
|
* because they were dependency-free.
|
|
*/
|
|
Stats::Scalar numIssueOrderDepStores;
|
|
|
|
/**
|
|
* Number of non load/store insts that got assigned an issue order
|
|
* dependency because they were dependency-free.
|
|
*/
|
|
Stats::Scalar numIssueOrderDepOther;
|
|
|
|
/** Number of filtered nodes */
|
|
Stats::Scalar numFilteredNodes;
|
|
|
|
/** Maximum number of dependents on any instruction */
|
|
Stats::Scalar maxNumDependents;
|
|
|
|
/**
|
|
* Maximum size of the temporary store mostly useful as a check that it is
|
|
* not growing
|
|
*/
|
|
Stats::Scalar maxTempStoreSize;
|
|
|
|
/**
|
|
* Maximum size of the map that holds the last writer to a physical
|
|
* register.
|
|
* */
|
|
Stats::Scalar maxPhysRegDepMapSize;
|
|
|
|
};
|
|
#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__
|