mem: Use the packet delays and do not just zero them out

This patch updates the I/O devices, bridge and simple memory to take
the packet header and payload delay into account in their latency
calculations. In all cases we add the header delay, i.e. the
accumulated pipeline delay of any crossbars, and the payload delay
needed for deserialisation of any payload.

Due to the additional unknown latency contribution, the packet queue
of the simple memory is changed to use insertion sorting based on the
time stamp. Moreover, since the memory hands out exclusive (non
shared) responses, we also need to ensure ordering for reads to the
same address.
This commit is contained in:
Andreas Hansson 2015-11-06 03:26:36 -05:00
parent 8bc925e36d
commit 6b70afd0d4
5 changed files with 52 additions and 18 deletions

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2012 ARM Limited * Copyright (c) 2012, 2015 ARM Limited
* All rights reserved. * All rights reserved.
* *
* The license below extends only to copyright in the software and shall * The license below extends only to copyright in the software and shall
@ -54,12 +54,14 @@ PioPort::PioPort(PioDevice *dev)
Tick Tick
PioPort::recvAtomic(PacketPtr pkt) PioPort::recvAtomic(PacketPtr pkt)
{ {
// @todo: We need to pay for this and not just zero it out // technically the packet only reaches us after the header delay,
// and typically we also need to deserialise any payload
Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
pkt->headerDelay = pkt->payloadDelay = 0; pkt->headerDelay = pkt->payloadDelay = 0;
const Tick delay(pkt->isRead() ? device->read(pkt) : device->write(pkt)); const Tick delay(pkt->isRead() ? device->read(pkt) : device->write(pkt));
assert(pkt->isResponse() || pkt->isError()); assert(pkt->isResponse() || pkt->isError());
return delay; return delay + receive_delay;
} }
AddrRangeList AddrRangeList

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013 ARM Limited * Copyright (c) 2013, 2015 ARM Limited
* All rights reserved * All rights reserved
* *
* The license below extends only to copyright in the software and shall * The license below extends only to copyright in the software and shall
@ -79,9 +79,15 @@ PciDevice::PciConfigPort::recvAtomic(PacketPtr pkt)
{ {
assert(pkt->getAddr() >= configAddr && assert(pkt->getAddr() >= configAddr &&
pkt->getAddr() < configAddr + PCI_CONFIG_SIZE); pkt->getAddr() < configAddr + PCI_CONFIG_SIZE);
// @todo someone should pay for this
// technically the packet only reaches us after the header delay,
// and typically we also need to deserialise any payload
Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
pkt->headerDelay = pkt->payloadDelay = 0; pkt->headerDelay = pkt->payloadDelay = 0;
return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt);
const Tick delay(pkt->isRead() ? device->readConfig(pkt) :
device->writeConfig(pkt));
return delay + receive_delay;
} }
AddrRangeList AddrRangeList

View file

@ -136,10 +136,14 @@ Bridge::BridgeMasterPort::recvTimingResp(PacketPtr pkt)
DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size()); DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size());
// @todo: We need to pay for this and not just zero it out // technically the packet only reaches us after the header delay,
// and typically we also need to deserialise any payload (unless
// the two sides of the bridge are synchronous)
Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
pkt->headerDelay = pkt->payloadDelay = 0; pkt->headerDelay = pkt->payloadDelay = 0;
slavePort.schedTimingResp(pkt, bridge.clockEdge(delay)); slavePort.schedTimingResp(pkt, bridge.clockEdge(delay) +
receive_delay);
return true; return true;
} }
@ -191,10 +195,15 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt)
} }
if (!retryReq) { if (!retryReq) {
// @todo: We need to pay for this and not just zero it out // technically the packet only reaches us after the header
// delay, and typically we also need to deserialise any
// payload (unless the two sides of the bridge are
// synchronous)
Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
pkt->headerDelay = pkt->payloadDelay = 0; pkt->headerDelay = pkt->payloadDelay = 0;
masterPort.schedTimingReq(pkt, bridge.clockEdge(delay)); masterPort.schedTimingReq(pkt, bridge.clockEdge(delay) +
receive_delay);
} }
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2010-2013 ARM Limited * Copyright (c) 2010-2013, 2015 ARM Limited
* All rights reserved * All rights reserved
* *
* The license below extends only to copyright in the software and shall * The license below extends only to copyright in the software and shall
@ -116,7 +116,10 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
return false; return false;
} }
// @todo someone should pay for this // technically the packet only reaches us after the header delay,
// and since this is a memory controller we also need to
// deserialise the payload before performing any write operation
Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
pkt->headerDelay = pkt->payloadDelay = 0; pkt->headerDelay = pkt->payloadDelay = 0;
// update the release time according to the bandwidth limit, and // update the release time according to the bandwidth limit, and
@ -150,10 +153,24 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
// recvAtomic() should already have turned packet into // recvAtomic() should already have turned packet into
// atomic response // atomic response
assert(pkt->isResponse()); assert(pkt->isResponse());
// to keep things simple (and in order), we put the packet at
// the end even if the latency suggests it should be sent Tick when_to_send = curTick() + receive_delay + getLatency();
// before the packet(s) before it
packetQueue.emplace_back(pkt, curTick() + getLatency()); // typically this should be added at the end, so start the
// insertion sort with the last element, also make sure not to
// re-order in front of some existing packet with the same
// address, the latter is important as this memory effectively
// hands out exclusive copies (shared is not asserted)
auto i = packetQueue.end();
--i;
while (i != packetQueue.begin() && when_to_send < i->tick &&
i->pkt->getAddr() != pkt->getAddr())
--i;
// emplace inserts the element before the position pointed to by
// the iterator, so advance it one step
packetQueue.emplace(++i, pkt, when_to_send);
if (!retryResp && !dequeueEvent.scheduled()) if (!retryResp && !dequeueEvent.scheduled())
schedule(dequeueEvent, packetQueue.back().tick); schedule(dequeueEvent, packetQueue.back().tick);
} else { } else {

View file

@ -49,7 +49,7 @@
#ifndef __SIMPLE_MEMORY_HH__ #ifndef __SIMPLE_MEMORY_HH__
#define __SIMPLE_MEMORY_HH__ #define __SIMPLE_MEMORY_HH__
#include <deque> #include <list>
#include "mem/abstract_mem.hh" #include "mem/abstract_mem.hh"
#include "mem/port.hh" #include "mem/port.hh"
@ -125,7 +125,7 @@ class SimpleMemory : public AbstractMemory
* actual memory access. Note that this is where the packet spends * actual memory access. Note that this is where the packet spends
* the memory latency. * the memory latency.
*/ */
std::deque<DeferredPacket> packetQueue; std::list<DeferredPacket> packetQueue;
/** /**
* Bandwidth in ticks per byte. The regulation affects the * Bandwidth in ticks per byte. The regulation affects the