12eb034378
Make best use of the compiler, and enable -Wextra as well as -Wall. There are a few issues that had to be resolved, but they are all trivial.
1477 lines
50 KiB
C++
1477 lines
50 KiB
C++
/*
|
|
* Copyright (c) 2013 - 2015 ARM Limited
|
|
* All rights reserved
|
|
*
|
|
* The license below extends only to copyright in the software and shall
|
|
* not be construed as granting a license to any other intellectual
|
|
* property including but not limited to intellectual property relating
|
|
* to a hardware implementation of the functionality of the software
|
|
* licensed hereunder. You may use the software subject to the license
|
|
* terms below provided that you ensure that this notice is replicated
|
|
* unmodified and in its entirety in all distributions of the software,
|
|
* modified or unmodified, in source code or in binary form.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Authors: Radhika Jagtap
|
|
* Andreas Hansson
|
|
* Thomas Grass
|
|
*/
|
|
|
|
#include "cpu/trace/trace_cpu.hh"
|
|
|
|
#include "sim/sim_exit.hh"
|
|
|
|
// Declare and initialize the static counter for number of trace CPUs.
|
|
int TraceCPU::numTraceCPUs = 0;
|
|
|
|
TraceCPU::TraceCPU(TraceCPUParams *params)
|
|
: BaseCPU(params),
|
|
icachePort(this),
|
|
dcachePort(this),
|
|
instMasterID(params->system->getMasterId(name() + ".inst")),
|
|
dataMasterID(params->system->getMasterId(name() + ".data")),
|
|
instTraceFile(params->instTraceFile),
|
|
dataTraceFile(params->dataTraceFile),
|
|
icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
|
|
dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
|
|
params->sizeROB, params->sizeStoreBuffer,
|
|
params->sizeLoadBuffer),
|
|
icacheNextEvent(this),
|
|
dcacheNextEvent(this),
|
|
oneTraceComplete(false),
|
|
firstFetchTick(0),
|
|
execCompleteEvent(nullptr)
|
|
{
|
|
// Increment static counter for number of Trace CPUs.
|
|
++TraceCPU::numTraceCPUs;
|
|
|
|
// Check that the python parameters for sizes of ROB, store buffer and load
|
|
// buffer do not overflow the corresponding C++ variables.
|
|
fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
|
|
"max. value of %d.\n", params->sizeROB, UINT16_MAX);
|
|
fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
|
|
"exceeds the max. value of %d.\n", params->sizeROB,
|
|
UINT16_MAX);
|
|
fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
|
|
" %d exceeds the max. value of %d.\n",
|
|
params->sizeLoadBuffer, UINT16_MAX);
|
|
}
|
|
|
|
TraceCPU::~TraceCPU()
|
|
{
|
|
|
|
}
|
|
|
|
TraceCPU*
|
|
TraceCPUParams::create()
|
|
{
|
|
return new TraceCPU(this);
|
|
}
|
|
|
|
void
|
|
TraceCPU::takeOverFrom(BaseCPU *oldCPU)
|
|
{
|
|
// Unbind the ports of the old CPU and bind the ports of the TraceCPU.
|
|
assert(!getInstPort().isConnected());
|
|
assert(oldCPU->getInstPort().isConnected());
|
|
BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
|
|
oldCPU->getInstPort().unbind();
|
|
getInstPort().bind(inst_peer_port);
|
|
|
|
assert(!getDataPort().isConnected());
|
|
assert(oldCPU->getDataPort().isConnected());
|
|
BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
|
|
oldCPU->getDataPort().unbind();
|
|
getDataPort().bind(data_peer_port);
|
|
}
|
|
|
|
void
|
|
TraceCPU::init()
|
|
{
|
|
DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
|
|
"\n", instTraceFile);
|
|
DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
|
|
dataTraceFile);
|
|
|
|
BaseCPU::init();
|
|
|
|
// Get the send tick of the first instruction read request and schedule
|
|
// icacheNextEvent at that tick.
|
|
Tick first_icache_tick = icacheGen.init();
|
|
schedule(icacheNextEvent, first_icache_tick);
|
|
|
|
// Get the send tick of the first data read/write request and schedule
|
|
// dcacheNextEvent at that tick.
|
|
Tick first_dcache_tick = dcacheGen.init();
|
|
schedule(dcacheNextEvent, first_dcache_tick);
|
|
|
|
// The static counter for number of Trace CPUs is correctly set at this
|
|
// point so create an event and pass it.
|
|
execCompleteEvent = new CountedExitEvent("end of all traces reached.",
|
|
numTraceCPUs);
|
|
// Save the first fetch request tick to dump it as tickOffset
|
|
firstFetchTick = first_icache_tick;
|
|
}
|
|
|
|
void
|
|
TraceCPU::schedIcacheNext()
|
|
{
|
|
DPRINTF(TraceCPUInst, "IcacheGen event.\n");
|
|
|
|
// Try to send the current packet or a retry packet if there is one
|
|
bool sched_next = icacheGen.tryNext();
|
|
// If packet sent successfully, schedule next event
|
|
if (sched_next) {
|
|
DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
|
|
"at %d.\n", curTick() + icacheGen.tickDelta());
|
|
schedule(icacheNextEvent, curTick() + icacheGen.tickDelta());
|
|
++numSchedIcacheEvent;
|
|
} else {
|
|
// check if traceComplete. If not, do nothing because sending failed
|
|
// and next event will be scheduled via RecvRetry()
|
|
if (icacheGen.isTraceComplete()) {
|
|
// If this is the first trace to complete, set the variable. If it
|
|
// is already set then both traces are complete to exit sim.
|
|
checkAndSchedExitEvent();
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
void
|
|
TraceCPU::schedDcacheNext()
|
|
{
|
|
DPRINTF(TraceCPUData, "DcacheGen event.\n");
|
|
|
|
dcacheGen.execute();
|
|
if (dcacheGen.isExecComplete()) {
|
|
checkAndSchedExitEvent();
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::checkAndSchedExitEvent()
|
|
{
|
|
if (!oneTraceComplete) {
|
|
oneTraceComplete = true;
|
|
} else {
|
|
// Schedule event to indicate execution is complete as both
|
|
// instruction and data access traces have been played back.
|
|
inform("%s: Execution complete.\n", name());
|
|
|
|
// Record stats which are computed at the end of simulation
|
|
tickOffset = firstFetchTick;
|
|
numCycles = (clockEdge() - firstFetchTick) / clockPeriod();
|
|
numOps = dcacheGen.getMicroOpCount();
|
|
schedule(*execCompleteEvent, curTick());
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::regStats()
|
|
{
|
|
|
|
BaseCPU::regStats();
|
|
|
|
numSchedDcacheEvent
|
|
.name(name() + ".numSchedDcacheEvent")
|
|
.desc("Number of events scheduled to trigger data request generator")
|
|
;
|
|
|
|
numSchedIcacheEvent
|
|
.name(name() + ".numSchedIcacheEvent")
|
|
.desc("Number of events scheduled to trigger instruction request generator")
|
|
;
|
|
|
|
numOps
|
|
.name(name() + ".numOps")
|
|
.desc("Number of micro-ops simulated by the Trace CPU")
|
|
;
|
|
|
|
cpi
|
|
.name(name() + ".cpi")
|
|
.desc("Cycles per micro-op used as a proxy for CPI")
|
|
.precision(6)
|
|
;
|
|
cpi = numCycles/numOps;
|
|
|
|
tickOffset
|
|
.name(name() + ".tickOffset")
|
|
.desc("The first execution tick for the root node of elastic traces")
|
|
;
|
|
|
|
icacheGen.regStats();
|
|
dcacheGen.regStats();
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::regStats()
|
|
{
|
|
using namespace Stats;
|
|
|
|
maxDependents
|
|
.name(name() + ".maxDependents")
|
|
.desc("Max number of dependents observed on a node")
|
|
;
|
|
|
|
maxReadyListSize
|
|
.name(name() + ".maxReadyListSize")
|
|
.desc("Max size of the ready list observed")
|
|
;
|
|
|
|
numSendAttempted
|
|
.name(name() + ".numSendAttempted")
|
|
.desc("Number of first attempts to send a request")
|
|
;
|
|
|
|
numSendSucceeded
|
|
.name(name() + ".numSendSucceeded")
|
|
.desc("Number of successful first attempts")
|
|
;
|
|
|
|
numSendFailed
|
|
.name(name() + ".numSendFailed")
|
|
.desc("Number of failed first attempts")
|
|
;
|
|
|
|
numRetrySucceeded
|
|
.name(name() + ".numRetrySucceeded")
|
|
.desc("Number of successful retries")
|
|
;
|
|
|
|
numSplitReqs
|
|
.name(name() + ".numSplitReqs")
|
|
.desc("Number of split requests")
|
|
;
|
|
|
|
numSOLoads
|
|
.name(name() + ".numSOLoads")
|
|
.desc("Number of strictly ordered loads")
|
|
;
|
|
|
|
numSOStores
|
|
.name(name() + ".numSOStores")
|
|
.desc("Number of strictly ordered stores")
|
|
;
|
|
|
|
dataLastTick
|
|
.name(name() + ".dataLastTick")
|
|
.desc("Last tick simulated from the elastic data trace")
|
|
;
|
|
}
|
|
|
|
Tick
|
|
TraceCPU::ElasticDataGen::init()
|
|
{
|
|
DPRINTF(TraceCPUData, "Initializing data memory request generator "
|
|
"DcacheGen: elastic issue with retry.\n");
|
|
|
|
if (!readNextWindow())
|
|
panic("Trace has %d elements. It must have at least %d elements.\n",
|
|
depGraph.size(), 2 * windowSize);
|
|
DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
|
|
depGraph.size());
|
|
|
|
if (!readNextWindow())
|
|
panic("Trace has %d elements. It must have at least %d elements.\n",
|
|
depGraph.size(), 2 * windowSize);
|
|
DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
|
|
depGraph.size());
|
|
|
|
// Print readyList
|
|
if (DTRACE(TraceCPUData)) {
|
|
printReadyList();
|
|
}
|
|
auto free_itr = readyList.begin();
|
|
DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
|
|
" is %d.\n", free_itr->seqNum, free_itr->execTick);
|
|
// Return the execute tick of the earliest ready node so that an event
|
|
// can be scheduled to call execute()
|
|
return (free_itr->execTick);
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::exit()
|
|
{
|
|
trace.reset();
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::readNextWindow()
|
|
{
|
|
|
|
// Read and add next window
|
|
DPRINTF(TraceCPUData, "Reading next window from file.\n");
|
|
|
|
if (traceComplete) {
|
|
// We are at the end of the file, thus we have no more records.
|
|
// Return false.
|
|
return false;
|
|
}
|
|
|
|
DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
|
|
depGraph.size());
|
|
|
|
uint32_t num_read = 0;
|
|
while (num_read != windowSize) {
|
|
|
|
// Create a new graph node
|
|
GraphNode* new_node = new GraphNode;
|
|
|
|
// Read the next line to get the next record. If that fails then end of
|
|
// trace has been reached and traceComplete needs to be set in addition
|
|
// to returning false.
|
|
if (!trace.read(new_node)) {
|
|
DPRINTF(TraceCPUData, "\tTrace complete!\n");
|
|
traceComplete = true;
|
|
return false;
|
|
}
|
|
|
|
// Annotate the ROB dependencies of the new node onto the parent nodes.
|
|
addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
|
|
// Annotate the register dependencies of the new node onto the parent
|
|
// nodes.
|
|
addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
|
|
|
|
num_read++;
|
|
// Add to map
|
|
depGraph[new_node->seqNum] = new_node;
|
|
if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
|
|
// Source dependencies are already complete, check if resources
|
|
// are available and issue. The execution time is approximated
|
|
// to current time plus the computational delay.
|
|
checkAndIssue(new_node);
|
|
}
|
|
}
|
|
|
|
DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
|
|
depGraph.size());
|
|
return true;
|
|
}
|
|
|
|
template<typename T> void
|
|
TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node,
|
|
T& dep_array, uint8_t& num_dep)
|
|
{
|
|
for (auto& a_dep : dep_array) {
|
|
// The convention is to set the dependencies starting with the first
|
|
// index in the ROB and register dependency arrays. Thus, when we reach
|
|
// a dependency equal to the initialisation value of zero, we know have
|
|
// iterated over all dependencies and can break.
|
|
if (a_dep == 0)
|
|
break;
|
|
// We look up the valid dependency, i.e. the parent of this node
|
|
auto parent_itr = depGraph.find(a_dep);
|
|
if (parent_itr != depGraph.end()) {
|
|
// If the parent is found, it is yet to be executed. Append a
|
|
// pointer to the new node to the dependents list of the parent
|
|
// node.
|
|
parent_itr->second->dependents.push_back(new_node);
|
|
auto num_depts = parent_itr->second->dependents.size();
|
|
maxDependents = std::max<double>(num_depts, maxDependents.value());
|
|
} else {
|
|
// The dependency is not found in the graph. So consider
|
|
// the execution of the parent is complete, i.e. remove this
|
|
// dependency.
|
|
a_dep = 0;
|
|
num_dep--;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::execute()
|
|
{
|
|
DPRINTF(TraceCPUData, "Execute start occupancy:\n");
|
|
DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
|
|
"depFreeQueue = %d ,", depGraph.size(), readyList.size(),
|
|
depFreeQueue.size());
|
|
hwResource.printOccupancy();
|
|
|
|
// Read next window to make sure that dependents of all dep-free nodes
|
|
// are in the depGraph
|
|
if (nextRead) {
|
|
readNextWindow();
|
|
nextRead = false;
|
|
}
|
|
|
|
// First attempt to issue the pending dependency-free nodes held
|
|
// in depFreeQueue. If resources have become available for a node,
|
|
// then issue it, i.e. add the node to readyList.
|
|
while (!depFreeQueue.empty()) {
|
|
if (checkAndIssue(depFreeQueue.front(), false)) {
|
|
DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
|
|
"%lli.\n", (depFreeQueue.front())->seqNum);
|
|
depFreeQueue.pop();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
// Proceed to execute from readyList
|
|
auto graph_itr = depGraph.begin();
|
|
auto free_itr = readyList.begin();
|
|
// Iterate through readyList until the next free node has its execute
|
|
// tick later than curTick or the end of readyList is reached
|
|
while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
|
|
|
|
// Get pointer to the node to be executed
|
|
graph_itr = depGraph.find(free_itr->seqNum);
|
|
assert(graph_itr != depGraph.end());
|
|
GraphNode* node_ptr = graph_itr->second;
|
|
|
|
// If there is a retryPkt send that else execute the load
|
|
if (retryPkt) {
|
|
// The retryPkt must be the request that was created by the
|
|
// first node in the readyList.
|
|
if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
|
|
panic("Retry packet's seqence number does not match "
|
|
"the first node in the readyList.\n");
|
|
}
|
|
if (port.sendTimingReq(retryPkt)) {
|
|
++numRetrySucceeded;
|
|
retryPkt = nullptr;
|
|
}
|
|
} else if (node_ptr->isLoad() || node_ptr->isStore()) {
|
|
// If there is no retryPkt, attempt to send a memory request in
|
|
// case of a load or store node. If the send fails, executeMemReq()
|
|
// returns a packet pointer, which we save in retryPkt. In case of
|
|
// a comp node we don't do anything and simply continue as if the
|
|
// execution of the comp node succedded.
|
|
retryPkt = executeMemReq(node_ptr);
|
|
}
|
|
// If the retryPkt or a new load/store node failed, we exit from here
|
|
// as a retry from cache will bring the control to execute(). The
|
|
// first node in readyList then, will be the failed node.
|
|
if (retryPkt) {
|
|
break;
|
|
}
|
|
|
|
// Proceed to remove dependencies for the successfully executed node.
|
|
// If it is a load which is not strictly ordered and we sent a
|
|
// request for it successfully, we do not yet mark any register
|
|
// dependencies complete. But as per dependency modelling we need
|
|
// to mark ROB dependencies of load and non load/store nodes which
|
|
// are based on successful sending of the load as complete.
|
|
if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
|
|
// If execute succeeded mark its dependents as complete
|
|
DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
|
|
"dependents..\n", node_ptr->seqNum);
|
|
|
|
auto child_itr = (node_ptr->dependents).begin();
|
|
while (child_itr != (node_ptr->dependents).end()) {
|
|
// ROB dependency of a store on a load must not be removed
|
|
// after load is sent but after response is received
|
|
if (!(*child_itr)->isStore() &&
|
|
(*child_itr)->removeRobDep(node_ptr->seqNum)) {
|
|
|
|
// Check if the child node has become dependency free
|
|
if ((*child_itr)->numRobDep == 0 &&
|
|
(*child_itr)->numRegDep == 0) {
|
|
|
|
// Source dependencies are complete, check if
|
|
// resources are available and issue
|
|
checkAndIssue(*child_itr);
|
|
}
|
|
// Remove this child for the sent load and point to new
|
|
// location of the element following the erased element
|
|
child_itr = node_ptr->dependents.erase(child_itr);
|
|
} else {
|
|
// This child is not dependency-free, point to the next
|
|
// child
|
|
child_itr++;
|
|
}
|
|
}
|
|
} else {
|
|
// If it is a strictly ordered load mark its dependents as complete
|
|
// as we do not send a request for this case. If it is a store or a
|
|
// comp node we also mark all its dependents complete.
|
|
DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
|
|
" up dependents..\n", node_ptr->seqNum);
|
|
|
|
for (auto child : node_ptr->dependents) {
|
|
// If the child node is dependency free removeDepOnInst()
|
|
// returns true.
|
|
if (child->removeDepOnInst(node_ptr->seqNum)) {
|
|
// Source dependencies are complete, check if resources
|
|
// are available and issue
|
|
checkAndIssue(child);
|
|
}
|
|
}
|
|
}
|
|
|
|
// After executing the node, remove from readyList and delete node.
|
|
readyList.erase(free_itr);
|
|
// If it is a cacheable load which was sent, don't delete
|
|
// just yet. Delete it in completeMemAccess() after the
|
|
// response is received. If it is an strictly ordered
|
|
// load, it was not sent and all dependencies were simply
|
|
// marked complete. Thus it is safe to delete it. For
|
|
// stores and non load/store nodes all dependencies were
|
|
// marked complete so it is safe to delete it.
|
|
if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
|
|
// Release all resources occupied by the completed node
|
|
hwResource.release(node_ptr);
|
|
// clear the dynamically allocated set of dependents
|
|
(node_ptr->dependents).clear();
|
|
// delete node
|
|
delete node_ptr;
|
|
// remove from graph
|
|
depGraph.erase(graph_itr);
|
|
}
|
|
// Point to first node to continue to next iteration of while loop
|
|
free_itr = readyList.begin();
|
|
} // end of while loop
|
|
|
|
// Print readyList, sizes of queues and resource status after updating
|
|
if (DTRACE(TraceCPUData)) {
|
|
printReadyList();
|
|
DPRINTF(TraceCPUData, "Execute end occupancy:\n");
|
|
DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
|
|
"depFreeQueue = %d ,", depGraph.size(), readyList.size(),
|
|
depFreeQueue.size());
|
|
hwResource.printOccupancy();
|
|
}
|
|
|
|
if (retryPkt) {
|
|
DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
|
|
"event from the cache for seq. num %lli.\n",
|
|
retryPkt->req->getReqInstSeqNum());
|
|
return;
|
|
}
|
|
// If the size of the dependency graph is less than the dependency window
|
|
// then read from the trace file to populate the graph next time we are in
|
|
// execute.
|
|
if (depGraph.size() < windowSize && !traceComplete)
|
|
nextRead = true;
|
|
|
|
// If cache is not blocked, schedule an event for the first execTick in
|
|
// readyList else retry from cache will schedule the event. If the ready
|
|
// list is empty then check if the next pending node has resources
|
|
// available to issue. If yes, then schedule an event for the next cycle.
|
|
if (!readyList.empty()) {
|
|
Tick next_event_tick = std::max(readyList.begin()->execTick,
|
|
curTick());
|
|
DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
|
|
next_event_tick);
|
|
owner.schedDcacheNextEvent(next_event_tick);
|
|
} else if (readyList.empty() && !depFreeQueue.empty() &&
|
|
hwResource.isAvailable(depFreeQueue.front())) {
|
|
DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
|
|
owner.clockEdge(Cycles(1)));
|
|
owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
|
|
}
|
|
|
|
// If trace is completely read, readyList is empty and depGraph is empty,
|
|
// set execComplete to true
|
|
if (depGraph.empty() && readyList.empty() && traceComplete &&
|
|
!hwResource.awaitingResponse()) {
|
|
DPRINTF(TraceCPUData, "\tExecution Complete!\n");
|
|
execComplete = true;
|
|
dataLastTick = curTick();
|
|
}
|
|
}
|
|
|
|
PacketPtr
|
|
TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
|
|
{
|
|
|
|
DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
|
|
"virt addr %d, pc %#x, size %d, flags %d).\n",
|
|
node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
|
|
node_ptr->pc, node_ptr->size, node_ptr->flags);
|
|
|
|
// If the request is strictly ordered, do not send it. Just return nullptr
|
|
// as if it was succesfully sent.
|
|
if (node_ptr->isStrictlyOrdered()) {
|
|
node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
|
|
DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
|
|
node_ptr->seqNum);
|
|
return nullptr;
|
|
}
|
|
|
|
// Check if the request spans two cache lines as this condition triggers
|
|
// an assert fail in the L1 cache. If it does then truncate the size to
|
|
// access only until the end of that line and ignore the remainder. The
|
|
// stat counting this is useful to keep a check on how frequently this
|
|
// happens. If required the code could be revised to mimick splitting such
|
|
// a request into two.
|
|
unsigned blk_size = owner.cacheLineSize();
|
|
Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
|
|
if (!(blk_offset + node_ptr->size <= blk_size)) {
|
|
node_ptr->size = blk_size - blk_offset;
|
|
++numSplitReqs;
|
|
}
|
|
|
|
// Create a request and the packet containing request
|
|
Request* req = new Request(node_ptr->physAddr, node_ptr->size,
|
|
node_ptr->flags, masterID, node_ptr->seqNum,
|
|
ContextID(0), ThreadID(0));
|
|
req->setPC(node_ptr->pc);
|
|
// If virtual address is valid, set the asid and virtual address fields
|
|
// of the request.
|
|
if (node_ptr->virtAddr != 0) {
|
|
req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
|
|
node_ptr->flags, masterID, node_ptr->pc);
|
|
req->setPaddr(node_ptr->physAddr);
|
|
req->setReqInstSeqNum(node_ptr->seqNum);
|
|
}
|
|
|
|
PacketPtr pkt;
|
|
uint8_t* pkt_data = new uint8_t[req->getSize()];
|
|
if (node_ptr->isLoad()) {
|
|
pkt = Packet::createRead(req);
|
|
} else {
|
|
pkt = Packet::createWrite(req);
|
|
memset(pkt_data, 0xA, req->getSize());
|
|
}
|
|
pkt->dataDynamic(pkt_data);
|
|
|
|
// Call MasterPort method to send a timing request for this packet
|
|
bool success = port.sendTimingReq(pkt);
|
|
++numSendAttempted;
|
|
|
|
if (!success) {
|
|
// If it fails, return the packet to retry when a retry is signalled by
|
|
// the cache
|
|
++numSendFailed;
|
|
DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
|
|
return pkt;
|
|
} else {
|
|
// It is succeeds, return nullptr
|
|
++numSendSucceeded;
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first)
|
|
{
|
|
// Assert the node is dependency-free
|
|
assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
|
|
|
|
// If this is the first attempt, print a debug message to indicate this.
|
|
if (first) {
|
|
DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
|
|
" dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
|
|
node_ptr->robNum);
|
|
}
|
|
|
|
// Check if resources are available to issue the specific node
|
|
if (hwResource.isAvailable(node_ptr)) {
|
|
// If resources are free only then add to readyList
|
|
DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
|
|
" to readyList, occupying resources.\n", node_ptr->seqNum);
|
|
// Compute the execute tick by adding the compute delay for the node
|
|
// and add the ready node to the ready list
|
|
addToSortedReadyList(node_ptr->seqNum,
|
|
owner.clockEdge() + node_ptr->compDelay);
|
|
// Account for the resources taken up by this issued node.
|
|
hwResource.occupy(node_ptr);
|
|
return true;
|
|
|
|
} else {
|
|
if (first) {
|
|
// Although dependencies are complete, resources are not available.
|
|
DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
|
|
" Adding to depFreeQueue.\n", node_ptr->seqNum);
|
|
depFreeQueue.push(node_ptr);
|
|
} else {
|
|
DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
|
|
"Still pending issue.\n", node_ptr->seqNum);
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt)
|
|
{
|
|
// Release the resources for this completed node.
|
|
if (pkt->isWrite()) {
|
|
// Consider store complete.
|
|
hwResource.releaseStoreBuffer();
|
|
// If it is a store response then do nothing since we do not model
|
|
// dependencies on store completion in the trace. But if we were
|
|
// blocking execution due to store buffer fullness, we need to schedule
|
|
// an event and attempt to progress.
|
|
} else {
|
|
// If it is a load response then release the dependents waiting on it.
|
|
// Get pointer to the completed load
|
|
auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
|
|
assert(graph_itr != depGraph.end());
|
|
GraphNode* node_ptr = graph_itr->second;
|
|
|
|
// Release resources occupied by the load
|
|
hwResource.release(node_ptr);
|
|
|
|
DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
|
|
" dependents..\n", node_ptr->seqNum);
|
|
|
|
for (auto child : node_ptr->dependents) {
|
|
if (child->removeDepOnInst(node_ptr->seqNum)) {
|
|
checkAndIssue(child);
|
|
}
|
|
}
|
|
|
|
// clear the dynamically allocated set of dependents
|
|
(node_ptr->dependents).clear();
|
|
// delete node
|
|
delete node_ptr;
|
|
// remove from graph
|
|
depGraph.erase(graph_itr);
|
|
}
|
|
|
|
if (DTRACE(TraceCPUData)) {
|
|
printReadyList();
|
|
}
|
|
|
|
// If the size of the dependency graph is less than the dependency window
|
|
// then read from the trace file to populate the graph next time we are in
|
|
// execute.
|
|
if (depGraph.size() < windowSize && !traceComplete)
|
|
nextRead = true;
|
|
|
|
// If not waiting for retry, attempt to schedule next event
|
|
if (!retryPkt) {
|
|
// We might have new dep-free nodes in the list which will have execute
|
|
// tick greater than or equal to curTick. But a new dep-free node might
|
|
// have its execute tick earlier. Therefore, attempt to reschedule. It
|
|
// could happen that the readyList is empty and we got here via a
|
|
// last remaining response. So, either the trace is complete or there
|
|
// are pending nodes in the depFreeQueue. The checking is done in the
|
|
// execute() control flow, so schedule an event to go via that flow.
|
|
Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
|
|
std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
|
|
DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
|
|
next_event_tick);
|
|
owner.schedDcacheNextEvent(next_event_tick);
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num,
|
|
Tick exec_tick)
|
|
{
|
|
ReadyNode ready_node;
|
|
ready_node.seqNum = seq_num;
|
|
ready_node.execTick = exec_tick;
|
|
|
|
// Iterator to readyList
|
|
auto itr = readyList.begin();
|
|
|
|
// If the readyList is empty, simply insert the new node at the beginning
|
|
// and return
|
|
if (itr == readyList.end()) {
|
|
readyList.insert(itr, ready_node);
|
|
maxReadyListSize = std::max<double>(readyList.size(),
|
|
maxReadyListSize.value());
|
|
return;
|
|
}
|
|
|
|
// If the new node has its execution tick equal to the first node in the
|
|
// list then go to the next node. If the first node in the list failed
|
|
// to execute, its position as the first is thus maintained.
|
|
if (retryPkt)
|
|
if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
|
|
itr++;
|
|
|
|
// Increment the iterator and compare the node pointed to by it to the new
|
|
// node till the position to insert the new node is found.
|
|
bool found = false;
|
|
while (!found && itr != readyList.end()) {
|
|
// If the execution tick of the new node is less than the node then
|
|
// this is the position to insert
|
|
if (exec_tick < itr->execTick)
|
|
found = true;
|
|
// If the execution tick of the new node is equal to the node then
|
|
// sort in ascending order of sequence numbers
|
|
else if (exec_tick == itr->execTick) {
|
|
// If the sequence number of the new node is less than the node
|
|
// then this is the position to insert
|
|
if (seq_num < itr->seqNum)
|
|
found = true;
|
|
// Else go to next node
|
|
else
|
|
itr++;
|
|
}
|
|
// If the execution tick of the new node is greater than the node then
|
|
// go to the next node
|
|
else
|
|
itr++;
|
|
}
|
|
readyList.insert(itr, ready_node);
|
|
// Update the stat for max size reached of the readyList
|
|
maxReadyListSize = std::max<double>(readyList.size(),
|
|
maxReadyListSize.value());
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::printReadyList() {
|
|
|
|
auto itr = readyList.begin();
|
|
if (itr == readyList.end()) {
|
|
DPRINTF(TraceCPUData, "readyList is empty.\n");
|
|
return;
|
|
}
|
|
DPRINTF(TraceCPUData, "Printing readyList:\n");
|
|
while (itr != readyList.end()) {
|
|
auto graph_itr = depGraph.find(itr->seqNum);
|
|
GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
|
|
DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
|
|
node_ptr->typeToStr(), itr->execTick);
|
|
itr++;
|
|
}
|
|
}
|
|
|
|
TraceCPU::ElasticDataGen::HardwareResource::HardwareResource(
|
|
uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
|
|
: sizeROB(max_rob),
|
|
sizeStoreBuffer(max_stores),
|
|
sizeLoadBuffer(max_loads),
|
|
oldestInFlightRobNum(UINT64_MAX),
|
|
numInFlightLoads(0),
|
|
numInFlightStores(0)
|
|
{}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node)
|
|
{
|
|
// Occupy ROB entry for the issued node
|
|
// Merely maintain the oldest node, i.e. numerically least robNum by saving
|
|
// it in the variable oldestInFLightRobNum.
|
|
inFlightNodes[new_node->seqNum] = new_node->robNum;
|
|
oldestInFlightRobNum = inFlightNodes.begin()->second;
|
|
|
|
// Occupy Load/Store Buffer entry for the issued node if applicable
|
|
if (new_node->isLoad()) {
|
|
++numInFlightLoads;
|
|
} else if (new_node->isStore()) {
|
|
++numInFlightStores;
|
|
} // else if it is a non load/store node, no buffer entry is occupied
|
|
|
|
printOccupancy();
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node)
|
|
{
|
|
assert(!inFlightNodes.empty());
|
|
DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
|
|
done_node->seqNum);
|
|
|
|
assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
|
|
inFlightNodes.erase(done_node->seqNum);
|
|
|
|
if (inFlightNodes.empty()) {
|
|
// If we delete the only in-flight node and then the
|
|
// oldestInFlightRobNum is set to it's initialized (max) value.
|
|
oldestInFlightRobNum = UINT64_MAX;
|
|
} else {
|
|
// Set the oldest in-flight node rob number equal to the first node in
|
|
// the inFlightNodes since that will have the numerically least value.
|
|
oldestInFlightRobNum = inFlightNodes.begin()->second;
|
|
}
|
|
|
|
DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
|
|
"oldestInFlightRobNum = %d\n", inFlightNodes.size(),
|
|
oldestInFlightRobNum);
|
|
|
|
// A store is considered complete when a request is sent, thus ROB entry is
|
|
// freed. But it occupies an entry in the Store Buffer until its response
|
|
// is received. A load is considered complete when a response is received,
|
|
// thus both ROB and Load Buffer entries can be released.
|
|
if (done_node->isLoad()) {
|
|
assert(numInFlightLoads != 0);
|
|
--numInFlightLoads;
|
|
}
|
|
// For normal writes, we send the requests out and clear a store buffer
|
|
// entry on response. For writes which are strictly ordered, for e.g.
|
|
// writes to device registers, we do that within release() which is called
|
|
// when node is executed and taken off from readyList.
|
|
if (done_node->isStore() && done_node->isStrictlyOrdered()) {
|
|
releaseStoreBuffer();
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer()
|
|
{
|
|
assert(numInFlightStores != 0);
|
|
--numInFlightStores;
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::HardwareResource::isAvailable(
|
|
const GraphNode* new_node) const
|
|
{
|
|
uint16_t num_in_flight_nodes;
|
|
if (inFlightNodes.empty()) {
|
|
num_in_flight_nodes = 0;
|
|
DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
|
|
" #in-flight nodes = 0", new_node->seqNum);
|
|
} else if (new_node->robNum > oldestInFlightRobNum) {
|
|
// This is the intuitive case where new dep-free node is younger
|
|
// instruction than the oldest instruction in-flight. Thus we make sure
|
|
// in_flight_nodes does not overflow.
|
|
num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
|
|
DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
|
|
" #in-flight nodes = %d - %d = %d", new_node->seqNum,
|
|
new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
|
|
} else {
|
|
// This is the case where an instruction older than the oldest in-
|
|
// flight instruction becomes dep-free. Thus we must have already
|
|
// accounted for the entry in ROB for this new dep-free node.
|
|
// Immediately after this check returns true, oldestInFlightRobNum will
|
|
// be updated in occupy(). We simply let this node issue now.
|
|
num_in_flight_nodes = 0;
|
|
DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
|
|
" new oldestInFlightRobNum = %d, #in-flight nodes ignored",
|
|
new_node->seqNum, new_node->robNum);
|
|
}
|
|
DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
|
|
numInFlightLoads, sizeLoadBuffer,
|
|
numInFlightStores, sizeStoreBuffer);
|
|
// Check if resources are available to issue the specific node
|
|
if (num_in_flight_nodes >= sizeROB) {
|
|
return false;
|
|
}
|
|
if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
|
|
return false;
|
|
}
|
|
if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const {
|
|
// Return true if there is at least one read or write request in flight
|
|
return (numInFlightStores != 0 || numInFlightLoads != 0);
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() {
|
|
DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
|
|
"LQ = %d/%d, SQ = %d/%d.\n",
|
|
oldestInFlightRobNum,
|
|
numInFlightLoads, sizeLoadBuffer,
|
|
numInFlightStores, sizeStoreBuffer);
|
|
}
|
|
|
|
void
|
|
TraceCPU::FixedRetryGen::regStats()
|
|
{
|
|
using namespace Stats;
|
|
|
|
numSendAttempted
|
|
.name(name() + ".numSendAttempted")
|
|
.desc("Number of first attempts to send a request")
|
|
;
|
|
|
|
numSendSucceeded
|
|
.name(name() + ".numSendSucceeded")
|
|
.desc("Number of successful first attempts")
|
|
;
|
|
|
|
numSendFailed
|
|
.name(name() + ".numSendFailed")
|
|
.desc("Number of failed first attempts")
|
|
;
|
|
|
|
numRetrySucceeded
|
|
.name(name() + ".numRetrySucceeded")
|
|
.desc("Number of successful retries")
|
|
;
|
|
|
|
instLastTick
|
|
.name(name() + ".instLastTick")
|
|
.desc("Last tick simulated from the fixed inst trace")
|
|
;
|
|
}
|
|
|
|
Tick
|
|
TraceCPU::FixedRetryGen::init()
|
|
{
|
|
DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
|
|
" IcacheGen: fixed issue with retry.\n");
|
|
|
|
if (nextExecute()) {
|
|
DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
|
|
return currElement.tick;
|
|
} else {
|
|
panic("Read of first message in the trace failed.\n");
|
|
return MaxTick;
|
|
}
|
|
}
|
|
|
|
bool
|
|
TraceCPU::FixedRetryGen::tryNext()
|
|
{
|
|
// If there is a retry packet, try to send it
|
|
if (retryPkt) {
|
|
|
|
DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
|
|
|
|
if (!port.sendTimingReq(retryPkt)) {
|
|
// Still blocked! This should never occur.
|
|
DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
|
|
return false;
|
|
}
|
|
++numRetrySucceeded;
|
|
} else {
|
|
|
|
DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
|
|
|
|
// try sending current element
|
|
assert(currElement.isValid());
|
|
|
|
++numSendAttempted;
|
|
|
|
if (!send(currElement.addr, currElement.blocksize,
|
|
currElement.cmd, currElement.flags, currElement.pc)) {
|
|
DPRINTF(TraceCPUInst, "currElement sending failed.\n");
|
|
++numSendFailed;
|
|
// return false to indicate not to schedule next event
|
|
return false;
|
|
} else {
|
|
++numSendSucceeded;
|
|
}
|
|
}
|
|
// If packet was sent successfully, either retryPkt or currElement, return
|
|
// true to indicate to schedule event at current Tick plus delta. If packet
|
|
// was sent successfully and there is no next packet to send, return false.
|
|
DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
|
|
"element.\n");
|
|
retryPkt = nullptr;
|
|
// Read next element into currElement, currElement gets cleared so save the
|
|
// tick to calculate delta
|
|
Tick last_tick = currElement.tick;
|
|
if (nextExecute()) {
|
|
assert(currElement.tick >= last_tick);
|
|
delta = currElement.tick - last_tick;
|
|
}
|
|
return !traceComplete;
|
|
}
|
|
|
|
void
|
|
TraceCPU::FixedRetryGen::exit()
|
|
{
|
|
trace.reset();
|
|
}
|
|
|
|
bool
|
|
TraceCPU::FixedRetryGen::nextExecute()
|
|
{
|
|
if (traceComplete)
|
|
// We are at the end of the file, thus we have no more messages.
|
|
// Return false.
|
|
return false;
|
|
|
|
|
|
//Reset the currElement to the default values
|
|
currElement.clear();
|
|
|
|
// Read the next line to get the next message. If that fails then end of
|
|
// trace has been reached and traceComplete needs to be set in addition
|
|
// to returning false. If successful then next message is in currElement.
|
|
if (!trace.read(&currElement)) {
|
|
traceComplete = true;
|
|
instLastTick = curTick();
|
|
return false;
|
|
}
|
|
|
|
DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
|
|
currElement.cmd.isRead() ? 'r' : 'w',
|
|
currElement.addr,
|
|
currElement.pc,
|
|
currElement.blocksize,
|
|
currElement.tick);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
|
|
Request::FlagsType flags, Addr pc)
|
|
{
|
|
|
|
// Create new request
|
|
Request* req = new Request(addr, size, flags, masterID);
|
|
req->setPC(pc);
|
|
|
|
// If this is not done it triggers assert in L1 cache for invalid contextId
|
|
req->setThreadContext(ContextID(0), ThreadID(0));
|
|
|
|
// Embed it in a packet
|
|
PacketPtr pkt = new Packet(req, cmd);
|
|
|
|
uint8_t* pkt_data = new uint8_t[req->getSize()];
|
|
pkt->dataDynamic(pkt_data);
|
|
|
|
if (cmd.isWrite()) {
|
|
memset(pkt_data, 0xA, req->getSize());
|
|
}
|
|
|
|
// Call MasterPort method to send a timing request for this packet
|
|
bool success = port.sendTimingReq(pkt);
|
|
if (!success) {
|
|
// If it fails, save the packet to retry when a retry is signalled by
|
|
// the cache
|
|
retryPkt = pkt;
|
|
}
|
|
return success;
|
|
}
|
|
|
|
void
|
|
TraceCPU::icacheRetryRecvd()
|
|
{
|
|
// Schedule an event to go through the control flow in the same tick as
|
|
// retry is received
|
|
DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
|
|
" event @%lli.\n", curTick());
|
|
schedule(icacheNextEvent, curTick());
|
|
}
|
|
|
|
void
|
|
TraceCPU::dcacheRetryRecvd()
|
|
{
|
|
// Schedule an event to go through the execute flow in the same tick as
|
|
// retry is received
|
|
DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
|
|
" event @%lli.\n", curTick());
|
|
schedule(dcacheNextEvent, curTick());
|
|
}
|
|
|
|
void
|
|
TraceCPU::schedDcacheNextEvent(Tick when)
|
|
{
|
|
if (!dcacheNextEvent.scheduled()) {
|
|
DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
|
|
when);
|
|
schedule(dcacheNextEvent, when);
|
|
++numSchedDcacheEvent;
|
|
} else if (when < dcacheNextEvent.when()) {
|
|
DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
|
|
" to %lli.\n", dcacheNextEvent.when(), when);
|
|
reschedule(dcacheNextEvent, when);
|
|
}
|
|
|
|
}
|
|
|
|
bool
|
|
TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
|
|
{
|
|
// All responses on the instruction fetch side are ignored. Simply delete
|
|
// the request and packet to free allocated memory
|
|
delete pkt->req;
|
|
delete pkt;
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
TraceCPU::IcachePort::recvReqRetry()
|
|
{
|
|
owner->icacheRetryRecvd();
|
|
}
|
|
|
|
void
|
|
TraceCPU::dcacheRecvTimingResp(PacketPtr pkt)
|
|
{
|
|
DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
|
|
dcacheGen.completeMemAccess(pkt);
|
|
}
|
|
|
|
bool
|
|
TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt)
|
|
{
|
|
// Handle the responses for data memory requests which is done inside the
|
|
// elastic data generator
|
|
owner->dcacheRecvTimingResp(pkt);
|
|
// After processing the response delete the request and packet to free
|
|
// memory
|
|
delete pkt->req;
|
|
delete pkt;
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
TraceCPU::DcachePort::recvReqRetry()
|
|
{
|
|
owner->dcacheRetryRecvd();
|
|
}
|
|
|
|
TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename)
|
|
: trace(filename),
|
|
microOpCount(0)
|
|
{
|
|
// Create a protobuf message for the header and read it from the stream
|
|
ProtoMessage::InstDepRecordHeader header_msg;
|
|
if (!trace.read(header_msg)) {
|
|
panic("Failed to read packet header from %s\n", filename);
|
|
|
|
if (header_msg.tick_freq() != SimClock::Frequency) {
|
|
panic("Trace %s was recorded with a different tick frequency %d\n",
|
|
header_msg.tick_freq());
|
|
}
|
|
} else {
|
|
// Assign window size equal to the field in the trace that was recorded
|
|
// when the data dependency trace was captured in the o3cpu model
|
|
windowSize = header_msg.window_size();
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::InputStream::reset()
|
|
{
|
|
trace.reset();
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element)
|
|
{
|
|
ProtoMessage::InstDepRecord pkt_msg;
|
|
if (trace.read(pkt_msg)) {
|
|
// Required fields
|
|
element->seqNum = pkt_msg.seq_num();
|
|
element->type = pkt_msg.type();
|
|
element->compDelay = pkt_msg.comp_delay();
|
|
|
|
// Repeated field robDepList
|
|
element->clearRobDep();
|
|
assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
|
|
for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
|
|
element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
|
|
element->numRobDep += 1;
|
|
}
|
|
|
|
// Repeated field
|
|
element->clearRegDep();
|
|
assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
|
|
for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
|
|
// There is a possibility that an instruction has both, a register
|
|
// and order dependency on an instruction. In such a case, the
|
|
// register dependency is omitted
|
|
bool duplicate = false;
|
|
for (int j = 0; j < element->numRobDep; j++) {
|
|
duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
|
|
}
|
|
if (!duplicate) {
|
|
element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
|
|
element->numRegDep += 1;
|
|
}
|
|
}
|
|
|
|
// Optional fields
|
|
if (pkt_msg.has_p_addr())
|
|
element->physAddr = pkt_msg.p_addr();
|
|
else
|
|
element->physAddr = 0;
|
|
|
|
if (pkt_msg.has_v_addr())
|
|
element->virtAddr = pkt_msg.v_addr();
|
|
else
|
|
element->virtAddr = 0;
|
|
|
|
if (pkt_msg.has_asid())
|
|
element->asid = pkt_msg.asid();
|
|
else
|
|
element->asid = 0;
|
|
|
|
if (pkt_msg.has_size())
|
|
element->size = pkt_msg.size();
|
|
else
|
|
element->size = 0;
|
|
|
|
if (pkt_msg.has_flags())
|
|
element->flags = pkt_msg.flags();
|
|
else
|
|
element->flags = 0;
|
|
|
|
if (pkt_msg.has_pc())
|
|
element->pc = pkt_msg.pc();
|
|
else
|
|
element->pc = 0;
|
|
|
|
// ROB occupancy number
|
|
++microOpCount;
|
|
if (pkt_msg.has_weight()) {
|
|
microOpCount += pkt_msg.weight();
|
|
}
|
|
element->robNum = microOpCount;
|
|
return true;
|
|
}
|
|
|
|
// We have reached the end of the file
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep)
|
|
{
|
|
for (auto& own_reg_dep : regDep) {
|
|
if (own_reg_dep == reg_dep) {
|
|
// If register dependency is found, make it zero and return true
|
|
own_reg_dep = 0;
|
|
assert(numRegDep > 0);
|
|
--numRegDep;
|
|
DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
|
|
"done.\n", seqNum, reg_dep);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Return false if the dependency is not found
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep)
|
|
{
|
|
for (auto& own_rob_dep : robDep) {
|
|
if (own_rob_dep == rob_dep) {
|
|
// If the rob dependency is found, make it zero and return true
|
|
own_rob_dep = 0;
|
|
assert(numRobDep > 0);
|
|
--numRobDep;
|
|
DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
|
|
"done.\n", seqNum, rob_dep);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::GraphNode::clearRegDep() {
|
|
for (auto& own_reg_dep : regDep) {
|
|
own_reg_dep = 0;
|
|
}
|
|
numRegDep = 0;
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::GraphNode::clearRobDep() {
|
|
for (auto& own_rob_dep : robDep) {
|
|
own_rob_dep = 0;
|
|
}
|
|
numRobDep = 0;
|
|
}
|
|
|
|
bool
|
|
TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num)
|
|
{
|
|
// If it is an rob dependency then remove it
|
|
if (!removeRobDep(done_seq_num)) {
|
|
// If it is not an rob dependency then it must be a register dependency
|
|
// If the register dependency is not found, it violates an assumption
|
|
// and must be caught by assert.
|
|
bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
|
|
assert(regdep_found);
|
|
}
|
|
// Return true if the node is dependency free
|
|
return (numRobDep == 0 && numRegDep == 0);
|
|
}
|
|
|
|
void
|
|
TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const
|
|
{
|
|
DPRINTFR(TraceCPUData, "%lli", seqNum);
|
|
DPRINTFR(TraceCPUData, ",%s", typeToStr());
|
|
if (isLoad() || isStore()) {
|
|
DPRINTFR(TraceCPUData, ",%i", physAddr);
|
|
DPRINTFR(TraceCPUData, ",%i", size);
|
|
DPRINTFR(TraceCPUData, ",%i", flags);
|
|
}
|
|
DPRINTFR(TraceCPUData, ",%lli", compDelay);
|
|
int i = 0;
|
|
DPRINTFR(TraceCPUData, "robDep:");
|
|
while (robDep[i] != 0) {
|
|
DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
|
|
i++;
|
|
}
|
|
i = 0;
|
|
DPRINTFR(TraceCPUData, "regDep:");
|
|
while (regDep[i] != 0) {
|
|
DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
|
|
i++;
|
|
}
|
|
auto child_itr = dependents.begin();
|
|
DPRINTFR(TraceCPUData, "dependents:");
|
|
while (child_itr != dependents.end()) {
|
|
DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
|
|
child_itr++;
|
|
}
|
|
|
|
DPRINTFR(TraceCPUData, "\n");
|
|
}
|
|
|
|
std::string
|
|
TraceCPU::ElasticDataGen::GraphNode::typeToStr() const
|
|
{
|
|
return Record::RecordType_Name(type);
|
|
}
|
|
|
|
TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename)
|
|
: trace(filename)
|
|
{
|
|
// Create a protobuf message for the header and read it from the stream
|
|
ProtoMessage::PacketHeader header_msg;
|
|
if (!trace.read(header_msg)) {
|
|
panic("Failed to read packet header from %s\n", filename);
|
|
|
|
if (header_msg.tick_freq() != SimClock::Frequency) {
|
|
panic("Trace %s was recorded with a different tick frequency %d\n",
|
|
header_msg.tick_freq());
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
TraceCPU::FixedRetryGen::InputStream::reset()
|
|
{
|
|
trace.reset();
|
|
}
|
|
|
|
bool
|
|
TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element)
|
|
{
|
|
ProtoMessage::Packet pkt_msg;
|
|
if (trace.read(pkt_msg)) {
|
|
element->cmd = pkt_msg.cmd();
|
|
element->addr = pkt_msg.addr();
|
|
element->blocksize = pkt_msg.size();
|
|
element->tick = pkt_msg.tick();
|
|
element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
|
|
element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
|
|
return true;
|
|
}
|
|
|
|
// We have reached the end of the file
|
|
return false;
|
|
}
|