mem: Clarification of packet crossbar timings

This patch clarifies the packet timings annotated
when going through a crossbar.

The old 'firstWordDelay' is replaced by 'headerDelay' that represents
the delay associated to the delivery of the header of the packet.

The old 'lastWordDelay' is replaced by 'payloadDelay' that represents
the delay needed to processing the payload of the packet.

For now the uses and values remain identical. However, going forward
the payloadDelay will be additive, and not include the
headerDelay. Follow-on patches will make the headerDelay capture the
pipeline latency incurred in the crossbar, whereas the payloadDelay
will capture the additional serialisation delay.
This commit is contained in:
Marco Balboni 2015-02-11 10:23:47 -05:00
parent e2828587b3
commit 268d9e59c5
15 changed files with 65 additions and 61 deletions

View file

@ -601,7 +601,7 @@ Walker::WalkerState::recvPacket(PacketPtr pkt)
assert(!read);
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
state = nextState;
nextState = Ready;

View file

@ -55,7 +55,7 @@ Tick
PioPort::recvAtomic(PacketPtr pkt)
{
// @todo: We need to pay for this and not just zero it out
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
const Tick delay(pkt->isRead() ? device->read(pkt) : device->write(pkt));
assert(pkt->isResponse() || pkt->isError());

View file

@ -80,7 +80,7 @@ PciDevice::PciConfigPort::recvAtomic(PacketPtr pkt)
assert(pkt->getAddr() >= configAddr &&
pkt->getAddr() < configAddr + PCI_CONFIG_SIZE);
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt);
}

View file

@ -82,7 +82,7 @@ class IntDevice
Tick recvMessage(PacketPtr pkt)
{
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
return device->recvMessage(pkt);
}
};

View file

@ -137,7 +137,7 @@ Bridge::BridgeMasterPort::recvTimingResp(PacketPtr pkt)
DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size());
// @todo: We need to pay for this and not just zero it out
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
slavePort.schedTimingResp(pkt, bridge.clockEdge(delay));
@ -181,7 +181,7 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt)
if (!retryReq) {
// @todo: We need to pay for this and not just zero it out
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
masterPort.schedTimingReq(pkt, bridge.clockEdge(delay));
}

View file

@ -419,7 +419,7 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
pkt->popSenderState();
delete rec;
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
// forwardLatency is set here because there is a response from an
// upper level cache.
memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
@ -486,7 +486,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
// also reset the bus time that the original packet has
// not yet paid for
snoop_pkt->firstWordDelay = snoop_pkt->lastWordDelay = 0;
snoop_pkt->headerDelay = snoop_pkt->payloadDelay = 0;
// make this an instantaneous express snoop, and let the
// other caches in the system know that the packet is
@ -521,7 +521,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
uncacheableFlush(pkt);
// @todo: someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
// writes go in write buffer, reads use MSHR,
// prefetches are acknowledged (responded to) and dropped
@ -579,7 +579,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
if (needsResponse) {
pkt->makeTimingResponse();
// @todo: Make someone pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
// In this case we are considering lat neglecting
// responseLatency, modelling hit latency just as
@ -598,7 +598,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
// miss
// @todo: Make someone pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
Addr blk_addr = blockAlign(pkt->getAddr());
MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
@ -1146,8 +1146,8 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
// from lower level caches/memory to an upper level cache or
// the core.
completion_time = clockEdge(responseLatency) +
(transfer_offset ? pkt->lastWordDelay :
pkt->firstWordDelay);
(transfer_offset ? pkt->payloadDelay :
pkt->headerDelay);
assert(!target->pkt->req->isUncacheable());
@ -1163,14 +1163,14 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
// from lower level caches/memory to an upper level cache or
// the core.
completion_time = clockEdge(responseLatency) +
pkt->lastWordDelay;
pkt->payloadDelay;
target->pkt->req->setExtraData(0);
} else {
// not a cache fill, just forwarding response
// responseLatency is the latency of the return path
// from lower level cahces/memory to the core.
completion_time = clockEdge(responseLatency) +
pkt->lastWordDelay;
pkt->payloadDelay;
if (pkt->isRead() && !is_error) {
target->pkt->setData(pkt->getConstPtr<uint8_t>());
}
@ -1190,7 +1190,7 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
target->pkt->getAddr());
}
// reset the bus additional time as it is now accounted for
target->pkt->firstWordDelay = target->pkt->lastWordDelay = 0;
target->pkt->headerDelay = target->pkt->payloadDelay = 0;
cpuSidePort->schedTimingResp(target->pkt, completion_time);
break;
@ -1239,7 +1239,7 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
mq = mshr->queue;
mq->markPending(mshr);
requestMemSideBus((RequestCause)mq->index, clockEdge() +
pkt->lastWordDelay);
pkt->payloadDelay);
} else {
mq->deallocate(mshr);
if (wasFull && !mq->isFull()) {
@ -1512,7 +1512,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
}
// We pay for fillLatency here.
blk->whenReady = clockEdge() + fillLatency * clockPeriod() +
pkt->lastWordDelay;
pkt->payloadDelay;
return blk;
}
@ -1548,7 +1548,7 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
assert(req_pkt->isInvalidate() || pkt->sharedAsserted());
pkt->makeTimingResponse();
// @todo Make someone pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
if (pkt->isRead()) {
pkt->setDataFromBlock(blk_data, blkSize);
}
@ -1599,7 +1599,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
snoopPkt.pushSenderState(new ForwardResponseRecord());
// the snoop packet does not need to wait any additional
// time
snoopPkt.firstWordDelay = snoopPkt.lastWordDelay = 0;
snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
cpuSidePort->sendTimingSnoopReq(&snoopPkt);
if (snoopPkt.memInhibitAsserted()) {
// cache-to-cache response from some upper cache

View file

@ -168,7 +168,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->lastWordDelay + curTick();
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// uncacheable requests need never be snooped
if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
@ -222,7 +222,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
assert(!pkt->memInhibitAsserted());
// undo the calculation so we can check for 0 again
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
src_port->name(), pkt->cmdString(), pkt->getAddr());
@ -301,7 +301,7 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->lastWordDelay + curTick();
Tick packetFinishTime = curTick() + pkt->payloadDelay;
if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) {
// let the snoop filter inspect the response and update its state
@ -427,7 +427,7 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
assert(!pkt->isExpressSnoop());
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->lastWordDelay + curTick();
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// forward it either as a snoop response or a normal response
if (forwardAsSnoop) {
@ -608,8 +608,8 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
transDist[pkt_cmd]++;
}
// @todo: Not setting first-word time
pkt->lastWordDelay = response_latency;
// @todo: Not setting header time
pkt->payloadDelay = response_latency;
return response_latency;
}
@ -648,8 +648,8 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
snoops++;
}
// @todo: Not setting first-word time
pkt->lastWordDelay = snoop_response_latency;
// @todo: Not setting header time
pkt->payloadDelay = snoop_response_latency;
return snoop_response_latency;
}

View file

@ -879,7 +879,7 @@ DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
assert(pkt->isResponse());
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
// queue the packet in the response queue to be sent out after
// the static latency has passed

View file

@ -270,7 +270,7 @@ DRAMSim2::accessAndRespond(PacketPtr pkt)
assert(pkt->isResponse());
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
DPRINTF(DRAMSim2, "Queuing response for address %lld\n",
pkt->getAddr());

View file

@ -124,8 +124,8 @@ void
StubSlavePort::ResponseEvent::process()
{
owner.responsePacket->makeResponse();
owner.responsePacket->firstWordDelay = 0;
owner.responsePacket->lastWordDelay = 0;
owner.responsePacket->headerDelay = 0;
owner.responsePacket->payloadDelay = 0;
if (owner.sendTimingResp(owner.responsePacket)) {
owner.responsePacket = NULL;

View file

@ -128,7 +128,7 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->lastWordDelay + curTick();
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// before forwarding the packet (and possibly altering it),
// remember if we are expecting a response
@ -146,7 +146,7 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
src_port->name(), pkt->cmdString(), pkt->getAddr());
// undo the calculation so we can check for 0 again
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
// occupy until the header is sent
reqLayers[master_port_id]->failedTiming(src_port,
@ -201,7 +201,7 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->lastWordDelay + curTick();
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// send the packet through the destination slave port
bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt);
@ -265,7 +265,7 @@ NoncoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
}
// @todo: Not setting first-word time
pkt->lastWordDelay = response_latency;
pkt->payloadDelay = response_latency;
return response_latency;
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012-2014 ARM Limited
* Copyright (c) 2012-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@ -313,23 +313,23 @@ class Packet : public Printable
public:
/**
* The extra delay from seeing the packet until the first word is
* The extra delay from seeing the packet until the header is
* transmitted. This delay is used to communicate the crossbar
* forwarding latency to the neighbouring object (e.g. a cache)
* that actually makes the packet wait. As the delay is relative,
* a 32-bit unsigned should be sufficient.
*/
uint32_t firstWordDelay;
uint32_t headerDelay;
/**
* The extra pipelining delay from seeing the packet until the
* last word is transmitted by the component that provided it (if
* any). This includes the first word delay. Similar to the first
* word delay, this is used to make up for the fact that the
* The extra pipelining delay from seeing the packet until the end of
* payload is transmitted by the component that provided it (if
* any). This includes the header delay. Similar to the header
* delay, this is used to make up for the fact that the
* crossbar does not make the packet wait. As the delay is
* relative, a 32-bit unsigned should be sufficient.
*/
uint32_t lastWordDelay;
uint32_t payloadDelay;
/**
* A virtual base opaque structure used to hold state associated
@ -574,7 +574,7 @@ class Packet : public Printable
Packet(const RequestPtr _req, MemCmd _cmd)
: cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
size(0), bytesValidStart(0), bytesValidEnd(0),
firstWordDelay(0), lastWordDelay(0),
headerDelay(0), payloadDelay(0),
senderState(NULL)
{
if (req->hasPaddr()) {
@ -596,7 +596,7 @@ class Packet : public Printable
Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize)
: cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
bytesValidStart(0), bytesValidEnd(0),
firstWordDelay(0), lastWordDelay(0),
headerDelay(0), payloadDelay(0),
senderState(NULL)
{
if (req->hasPaddr()) {
@ -621,8 +621,8 @@ class Packet : public Printable
addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
bytesValidStart(pkt->bytesValidStart),
bytesValidEnd(pkt->bytesValidEnd),
firstWordDelay(pkt->firstWordDelay),
lastWordDelay(pkt->lastWordDelay),
headerDelay(pkt->headerDelay),
payloadDelay(pkt->payloadDelay),
senderState(pkt->senderState)
{
if (!clear_flags)

View file

@ -125,7 +125,7 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
}
// @todo someone should pay for this
pkt->firstWordDelay = pkt->lastWordDelay = 0;
pkt->headerDelay = pkt->payloadDelay = 0;
// update the release time according to the bandwidth limit, and
// do so with respect to the time it takes to finish this request

View file

@ -109,23 +109,27 @@ BaseXBar::calcPacketTiming(PacketPtr pkt)
// until the next clock edge (could be zero)
Tick offset = clockEdge() - curTick();
// determine how many cycles are needed to send the data
// Determine how many cycles are needed to send the data
// If the packet has no data we take into account just the cycle to send
// the header.
unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;
// before setting the bus delay fields of the packet, ensure that
// the delay from any previous crossbar has been accounted for
if (pkt->firstWordDelay != 0 || pkt->lastWordDelay != 0)
if (pkt->headerDelay != 0 || pkt->payloadDelay != 0)
panic("Packet %s already has delay (%d, %d) that should be "
"accounted for.\n", pkt->cmdString(), pkt->firstWordDelay,
pkt->lastWordDelay);
"accounted for.\n", pkt->cmdString(), pkt->headerDelay,
pkt->payloadDelay);
// The first word will be delivered on the cycle after the header.
pkt->firstWordDelay = (headerCycles + 1) * clockPeriod() + offset;
// The headerDelay takes into account the relative time to deliver the
// header of the packet. It will be charged of the additional delay of
// the xbar if the packet goes through it.
pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset;
// Note that currently lastWordDelay can be smaller than
// firstWordDelay if the packet has no data
pkt->lastWordDelay = (headerCycles + dataCycles) * clockPeriod() +
offset;
// The payloadDelay takes into account the relative time to deliver the
// payload of the packet. If the packet has no data its value is just one
// tick (due to header) plus the offset value.
pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset;
}
template <typename SrcType, typename DstType>

View file

@ -333,9 +333,9 @@ class BaseXBar : public MemObject
/**
* Calculate the timing parameters for the packet. Updates the
* firstWordDelay and lastWordDelay fields of the packet
* headerDelay and payloadDelay fields of the packet
* object with the relative number of ticks required to transmit
* the header and the first word, and the last word, respectively.
* the header and the payload, respectively.
*/
void calcPacketTiming(PacketPtr pkt);