mem: Use the packet delays and do not just zero them out

This patch updates the I/O devices, bridge and simple memory to take the packet header and payload delay into account in their latency calculations. In all cases we add the header delay, i.e. the accumulated pipeline delay of any crossbars, and the payload delay needed for deserialisation of any payload. Due to the additional unknown latency contribution, the packet queue of the simple memory is changed to use insertion sorting based on the time stamp. Moreover, since the memory hands out exclusive (non shared) responses, we also need to ensure ordering for reads to the same address.
2015-11-06 03:26:36 -05:00 · 2015-11-06 03:26:36 -05:00 · 6b70afd0d4
commit 6b70afd0d4
parent 8bc925e36d
5 changed files with 52 additions and 18 deletions
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2015 ARM Limited
 * All rights reserved.
 *
 * The license below extends only to copyright in the software and shall
@ -54,12 +54,14 @@ PioPort::PioPort(PioDevice *dev)
 Tick
 PioPort::recvAtomic(PacketPtr pkt)
 {
-    // @todo: We need to pay for this and not just zero it out
+    // technically the packet only reaches us after the header delay,
+    // and typically we also need to deserialise any payload
+    Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
    pkt->headerDelay = pkt->payloadDelay = 0;

    const Tick delay(pkt->isRead() ? device->read(pkt) : device->write(pkt));
    assert(pkt->isResponse() || pkt->isError());
-    return delay;
+    return delay + receive_delay;
 }

 AddrRangeList
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 ARM Limited
+ * Copyright (c) 2013, 2015 ARM Limited
 * All rights reserved
 *
 * The license below extends only to copyright in the software and shall
@ -79,9 +79,15 @@ PciDevice::PciConfigPort::recvAtomic(PacketPtr pkt)
 {
    assert(pkt->getAddr() >= configAddr &&
           pkt->getAddr() < configAddr + PCI_CONFIG_SIZE);
-    // @todo someone should pay for this
+
+    // technically the packet only reaches us after the header delay,
+    // and typically we also need to deserialise any payload
+    Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
    pkt->headerDelay = pkt->payloadDelay = 0;
-    return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt);
+
+    const Tick delay(pkt->isRead() ? device->readConfig(pkt) :
+                     device->writeConfig(pkt));
+    return delay + receive_delay;
 }

 AddrRangeList
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@ -136,10 +136,14 @@ Bridge::BridgeMasterPort::recvTimingResp(PacketPtr pkt)

    DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size());

-    // @todo: We need to pay for this and not just zero it out
+    // technically the packet only reaches us after the header delay,
+    // and typically we also need to deserialise any payload (unless
+    // the two sides of the bridge are synchronous)
+    Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
    pkt->headerDelay = pkt->payloadDelay = 0;

-    slavePort.schedTimingResp(pkt, bridge.clockEdge(delay));
+    slavePort.schedTimingResp(pkt, bridge.clockEdge(delay) +
+                              receive_delay);

    return true;
 }
@ -191,10 +195,15 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt)
        }

        if (!retryReq) {
-            // @todo: We need to pay for this and not just zero it out
+            // technically the packet only reaches us after the header
+            // delay, and typically we also need to deserialise any
+            // payload (unless the two sides of the bridge are
+            // synchronous)
+            Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
            pkt->headerDelay = pkt->payloadDelay = 0;

-            masterPort.schedTimingReq(pkt, bridge.clockEdge(delay));
+            masterPort.schedTimingReq(pkt, bridge.clockEdge(delay) +
+                                      receive_delay);
        }
    }

--- a/src/mem/simple_mem.cc
+++ b/src/mem/simple_mem.cc
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2013, 2015 ARM Limited
 * All rights reserved
 *
 * The license below extends only to copyright in the software and shall
@ -116,7 +116,10 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
        return false;
    }

-    // @todo someone should pay for this
+    // technically the packet only reaches us after the header delay,
+    // and since this is a memory controller we also need to
+    // deserialise the payload before performing any write operation
+    Tick receive_delay = pkt->headerDelay + pkt->payloadDelay;
    pkt->headerDelay = pkt->payloadDelay = 0;

    // update the release time according to the bandwidth limit, and
@ -150,10 +153,24 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
        // recvAtomic() should already have turned packet into
        // atomic response
        assert(pkt->isResponse());
-        // to keep things simple (and in order), we put the packet at
-        // the end even if the latency suggests it should be sent
-        // before the packet(s) before it
-        packetQueue.emplace_back(pkt, curTick() + getLatency());
+
+        Tick when_to_send = curTick() + receive_delay + getLatency();
+
+        // typically this should be added at the end, so start the
+        // insertion sort with the last element, also make sure not to
+        // re-order in front of some existing packet with the same
+        // address, the latter is important as this memory effectively
+        // hands out exclusive copies (shared is not asserted)
+        auto i = packetQueue.end();
+        --i;
+        while (i != packetQueue.begin() && when_to_send < i->tick &&
+               i->pkt->getAddr() != pkt->getAddr())
+            --i;
+
+        // emplace inserts the element before the position pointed to by
+        // the iterator, so advance it one step
+        packetQueue.emplace(++i, pkt, when_to_send);
+
        if (!retryResp && !dequeueEvent.scheduled())
            schedule(dequeueEvent, packetQueue.back().tick);
    } else {
--- a/src/mem/simple_mem.hh
+++ b/src/mem/simple_mem.hh
@ -49,7 +49,7 @@
 #ifndef __SIMPLE_MEMORY_HH__
 #define __SIMPLE_MEMORY_HH__

-#include <deque>
+#include <list>

 #include "mem/abstract_mem.hh"
 #include "mem/port.hh"
@ -125,7 +125,7 @@ class SimpleMemory : public AbstractMemory
     * actual memory access. Note that this is where the packet spends
     * the memory latency.
     */
-    std::deque<DeferredPacket> packetQueue;
+    std::list<DeferredPacket> packetQueue;

    /**
     * Bandwidth in ticks per byte. The regulation affects the