From 2a675aecb904143327befde70704d87c85fe7ea5 Mon Sep 17 00:00:00 2001
From: Andreas Hansson <andreas.hansson@arm.com>
Date: Mon, 19 Aug 2013 03:52:25 -0400
Subject: [PATCH] mem: Add an internal packet queue in SimpleMemory

This patch adds a packet queue in SimpleMemory to avoid using the
packet queue in the port (and thus have no involvement in the flow
control). The port queue was bound to 100 packets, and as the
SimpleMemory is modelling both a controller and an actual RAM, it
potentially has a large number of packets in flight. There is
currently no limit on the number of packets in the memory controller,
but this could easily be added in a follow-on patch.

As a result of the added internal storage, the functional access and
draining is updated. Some minor cleaning up and renaming has also been
done.

The memtest regression changes as a result of this patch and the stats
will be updated.
---
 src/mem/SimpleMemory.py |   2 +-
 src/mem/simple_mem.cc   | 117 +++++++++++++++++++++++++++-------------
 src/mem/simple_mem.hh   | 101 +++++++++++++++++++++++++++-------
 3 files changed, 164 insertions(+), 56 deletions(-)
diff --git a/src/mem/SimpleMemory.py b/src/mem/SimpleMemory.py
index 0cf6dece3..0a90eaa7c 100644
--- a/src/mem/SimpleMemory.py
+++ b/src/mem/SimpleMemory.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012 ARM Limited
+# Copyright (c) 2012-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc
index 3492360cd..74d5c4ab2 100644
--- a/src/mem/simple_mem.cc
+++ b/src/mem/simple_mem.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -49,9 +49,10 @@ using namespace std;
 
 SimpleMemory::SimpleMemory(const SimpleMemoryParams* p) :
     AbstractMemory(p),
-    port(name() + ".port", *this), lat(p->latency),
-    lat_var(p->latency_var), bandwidth(p->bandwidth),
-    isBusy(false), retryReq(false), releaseEvent(this)
+    port(name() + ".port", *this), latency(p->latency),
+    latency_var(p->latency_var), bandwidth(p->bandwidth), isBusy(false),
+    retryReq(false), retryResp(false),
+    releaseEvent(this), dequeueEvent(this), drainManager(NULL)
 {
 }
 
@@ -66,29 +67,24 @@ SimpleMemory::init()
 }
 
 Tick
-SimpleMemory::calculateLatency(PacketPtr pkt)
-{
-    if (pkt->memInhibitAsserted()) {
-        return 0;
-    } else {
-        Tick latency = lat;
-        if (lat_var != 0)
-            latency += random_mt.random<Tick>(0, lat_var);
-        return latency;
-    }
-}
-
-Tick
-SimpleMemory::doAtomicAccess(PacketPtr pkt)
+SimpleMemory::recvAtomic(PacketPtr pkt)
 {
     access(pkt);
-    return calculateLatency(pkt);
+    return pkt->memInhibitAsserted() ? 0 : getLatency();
 }
 
 void
-SimpleMemory::doFunctionalAccess(PacketPtr pkt)
+SimpleMemory::recvFunctional(PacketPtr pkt)
 {
+    pkt->pushLabel(name());
+
     functionalAccess(pkt);
+
+    // potentially update the packets in our packet queue as well
+    for (auto i = packetQueue.begin(); i != packetQueue.end(); ++i)
+        pkt->checkFunctional(i->pkt);
+
+    pkt->popLabel();
 }
 
 bool
@@ -149,13 +145,18 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
     // go ahead and deal with the packet and put the response in the
     // queue if there is one
     bool needsResponse = pkt->needsResponse();
-    Tick latency = doAtomicAccess(pkt);
+    recvAtomic(pkt);
     // turn packet around to go back to requester if response expected
     if (needsResponse) {
-        // doAtomicAccess() should already have turned packet into
+        // recvAtomic() should already have turned packet into
         // atomic response
         assert(pkt->isResponse());
-        port.schedTimingResp(pkt, curTick() + latency);
+        // to keep things simple (and in order), we put the packet at
+        // the end even if the latency suggests it should be sent
+        // before the packet(s) before it
+        packetQueue.push_back(DeferredPacket(pkt, curTick() + getLatency()));
+        if (!dequeueEvent.scheduled())
+            schedule(dequeueEvent, packetQueue.back().tick);
     } else {
         pendingDelete.push_back(pkt);
     }
@@ -174,6 +175,46 @@ SimpleMemory::release()
     }
 }
 
+void
+SimpleMemory::dequeue()
+{
+    assert(!packetQueue.empty());
+    DeferredPacket deferred_pkt = packetQueue.front();
+
+    retryResp = !port.sendTimingResp(deferred_pkt.pkt);
+
+    if (!retryResp) {
+        packetQueue.pop_front();
+
+        // if the queue is not empty, schedule the next dequeue event,
+        // otherwise signal that we are drained if we were asked to do so
+        if (!packetQueue.empty()) {
+            // if there were packets that got in-between then we
+            // already have an event scheduled, so use re-schedule
+            reschedule(dequeueEvent,
+                       std::max(packetQueue.front().tick, curTick()), true);
+        } else if (drainManager) {
+            drainManager->signalDrainDone();
+            drainManager = NULL;
+        }
+    }
+}
+
+Tick
+SimpleMemory::getLatency() const
+{
+    return latency +
+        (latency_var ? random_mt.random<Tick>(0, latency_var) : 0);
+}
+
+void
+SimpleMemory::recvRetry()
+{
+    assert(retryResp);
+
+    dequeue();
+}
+
 BaseSlavePort &
 SimpleMemory::getSlavePort(const std::string &if_name, PortID idx)
 {
@@ -187,7 +228,13 @@ SimpleMemory::getSlavePort(const std::string &if_name, PortID idx)
 unsigned int
 SimpleMemory::drain(DrainManager *dm)
 {
-    int count = port.drain(dm);
+    int count = 0;
+
+    // also track our internal queue
+    if (!packetQueue.empty()) {
+        count += 1;
+        drainManager = dm;
+    }
 
     if (count)
         setDrainState(Drainable::Draining);
@@ -198,8 +245,7 @@ SimpleMemory::drain(DrainManager *dm)
 
 SimpleMemory::MemoryPort::MemoryPort(const std::string& _name,
                                      SimpleMemory& _memory)
-    : QueuedSlavePort(_name, &_memory, queueImpl),
-      queueImpl(_memory, *this), memory(_memory)
+    : SlavePort(_name, &_memory), memory(_memory)
 { }
 
 AddrRangeList
@@ -213,22 +259,13 @@ SimpleMemory::MemoryPort::getAddrRanges() const
 Tick
 SimpleMemory::MemoryPort::recvAtomic(PacketPtr pkt)
 {
-    return memory.doAtomicAccess(pkt);
+    return memory.recvAtomic(pkt);
 }
 
 void
 SimpleMemory::MemoryPort::recvFunctional(PacketPtr pkt)
 {
-    pkt->pushLabel(memory.name());
-
-    if (!queue.checkFunctional(pkt)) {
-        // Default implementation of SimpleTimingPort::recvFunctional()
-        // calls recvAtomic() and throws away the latency; we can save a
-        // little here by just not calculating the latency.
-        memory.doFunctionalAccess(pkt);
-    }
-
-    pkt->popLabel();
+    memory.recvFunctional(pkt);
 }
 
 bool
@@ -237,6 +274,12 @@ SimpleMemory::MemoryPort::recvTimingReq(PacketPtr pkt)
     return memory.recvTimingReq(pkt);
 }
 
+void
+SimpleMemory::MemoryPort::recvRetry()
+{
+    memory.recvRetry();
+}
+
 SimpleMemory*
 SimpleMemoryParams::create()
 {
diff --git a/src/mem/simple_mem.hh b/src/mem/simple_mem.hh
index ab002f270..ba4b8bdf1 100644
--- a/src/mem/simple_mem.hh
+++ b/src/mem/simple_mem.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -49,15 +49,16 @@
 #ifndef __SIMPLE_MEMORY_HH__
 #define __SIMPLE_MEMORY_HH__
 
+#include <deque>
+
 #include "mem/abstract_mem.hh"
-#include "mem/tport.hh"
+#include "mem/port.hh"
 #include "params/SimpleMemory.hh"
 
 /**
  * The simple memory is a basic single-ported memory controller with
- * an configurable throughput and latency, potentially with a variance
- * added to the latter. It uses a QueueSlavePort to avoid dealing with
- * the flow control of sending responses.
+ * a configurable throughput and latency.
+ *
  * @sa  \ref gem5MemorySystem "gem5 Memory System"
  */
 class SimpleMemory : public AbstractMemory
@@ -65,13 +66,27 @@ class SimpleMemory : public AbstractMemory
 
   private:
 
-    class MemoryPort : public QueuedSlavePort
+    /**
+     * A deferred packet stores a packet along with its scheduled
+     * transmission time
+     */
+    class DeferredPacket
+    {
+
+      public:
+
+        const Tick tick;
+        const PacketPtr pkt;
+
+        DeferredPacket(PacketPtr _pkt, Tick _tick) : tick(_tick), pkt(_pkt)
+        { }
+    };
+
+    class MemoryPort : public SlavePort
     {
 
       private:
 
-        /// Queue holding the response packets
-        SlavePacketQueue queueImpl;
         SimpleMemory& memory;
 
       public:
@@ -86,16 +101,37 @@ class SimpleMemory : public AbstractMemory
 
         bool recvTimingReq(PacketPtr pkt);
 
+        void recvRetry();
+
         AddrRangeList getAddrRanges() const;
 
     };
 
     MemoryPort port;
 
-    Tick lat;
-    Tick lat_var;
+    /**
+     * Latency from that a request is accepted until the response is
+     * ready to be sent.
+     */
+    const Tick latency;
 
-    /// Bandwidth in ticks per byte
+    /**
+     * Fudge factor added to the latency.
+     */
+    const Tick latency_var;
+
+    /**
+     * Internal (unbounded) storage to mimic the delay caused by the
+     * actual memory access. Note that this is where the packet spends
+     * the memory latency.
+     */
+    std::deque<DeferredPacket> packetQueue;
+
+    /**
+     * Bandwidth in ticks per byte. The regulation affects the
+     * acceptance rate of requests and the queueing takes place after
+     * the regulation.
+     */
     const double bandwidth;
 
     /**
@@ -110,6 +146,12 @@ class SimpleMemory : public AbstractMemory
      */
     bool retryReq;
 
+    /**
+     * Remember if we failed to send a response and are awaiting a
+     * retry. This is only used as a check.
+     */
+    bool retryResp;
+
     /**
      * Release the memory after being busy and send a retry if a
      * request was rejected in the meanwhile.
@@ -118,29 +160,52 @@ class SimpleMemory : public AbstractMemory
 
     EventWrapper<SimpleMemory, &SimpleMemory::release> releaseEvent;
 
+    /**
+     * Dequeue a packet from our internal packet queue and move it to
+     * the port where it will be sent as soon as possible.
+     */
+    void dequeue();
+
+    EventWrapper<SimpleMemory, &SimpleMemory::dequeue> dequeueEvent;
+
+    /**
+     * Detemine the latency.
+     *
+     * @return the latency seen by the current packet
+     */
+    Tick getLatency() const;
+
     /** @todo this is a temporary workaround until the 4-phase code is
      * committed. upstream caches needs this packet until true is returned, so
      * hold onto it for deletion until a subsequent call
      */
     std::vector<PacketPtr> pendingDelete;
 
+    /**
+     * If we need to drain, keep the drain manager around until we're
+     * done here.
+     */
+    DrainManager *drainManager;
+
   public:
 
     SimpleMemory(const SimpleMemoryParams *p);
-    virtual ~SimpleMemory() { }
 
     unsigned int drain(DrainManager *dm);
 
-    virtual BaseSlavePort& getSlavePort(const std::string& if_name,
-                                        PortID idx = InvalidPortID);
-    virtual void init();
+    BaseSlavePort& getSlavePort(const std::string& if_name,
+                                PortID idx = InvalidPortID);
+    void init();
 
   protected:
 
-    Tick doAtomicAccess(PacketPtr pkt);
-    void doFunctionalAccess(PacketPtr pkt);
+    Tick recvAtomic(PacketPtr pkt);
+
+    void recvFunctional(PacketPtr pkt);
+
     bool recvTimingReq(PacketPtr pkt);
-    Tick calculateLatency(PacketPtr pkt);
+
+    void recvRetry();
 
 };