From 0bd9dfb8dee9afae4f87b89435c11fa581a71983 Mon Sep 17 00:00:00 2001
From: Nikos Nikoleris <nikos.nikoleris@arm.com>
Date: Mon, 5 Dec 2016 16:48:19 -0500
Subject: [PATCH] mem: Service only the 1st FromCPU MSHR target on
 ReadRespWithInv

A response to a ReadReq can either be a ReadResp or a
ReadRespWithInvalidate. As we add targets to an MSHR for a ReadReq we
assume that the response will be a ReadResp. When the response is
invalidating (ReadRespWithInvalidate) servicing more than one targets
can potentially violate the memory ordering. This change fixes the way
we handle a ReadRespWithInvalidate. When a cache receives a
ReadRespWithInvalidate we service only the first FromCPU target and
all the FromSnoop targets from the MSHR target list. The rest of the
FromCPU targets are deferred and serviced by a new request.

Change-Id: I75c30c268851987ee5f8644acb46f440b4eeeec2
Reviewed-by: Andreas Hansson <andreas.hansson@arm.com>
Reviewed-by: Stephan Diestelhorst <stephan.diestelhorst@arm.com>
---
 src/mem/cache/cache.cc | 18 ++++++---------
 src/mem/cache/mshr.cc  | 50 +++++++++++++++++++++++++++++++++++++-----
 src/mem/cache/mshr.hh  | 34 ++++++++++++++++++++++++++--
 3 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index db982c1f0..b7f336da9 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -1330,12 +1330,10 @@ Cache::recvTimingResp(PacketPtr pkt)
     int initial_offset = initial_tgt->pkt->getOffset(blkSize);
 
     bool from_cache = false;
-
-    while (mshr->hasTargets()) {
-        MSHR::Target *target = mshr->getTarget();
-        Packet *tgt_pkt = target->pkt;
-
-        switch (target->source) {
+    MSHR::TargetList targets = mshr->extractServiceableTargets(pkt);
+    for (auto &target: targets) {
+        Packet *tgt_pkt = target.pkt;
+        switch (target.source) {
           case MSHR::Target::FromCPU:
             Tick completion_time;
             // Here we charge on completion_time the delay of the xbar if the
@@ -1370,7 +1368,7 @@ Cache::recvTimingResp(PacketPtr pkt)
                 mshr->promoteWritable();
                 // NB: we use the original packet here and not the response!
                 blk = handleFill(tgt_pkt, blk, writebacks,
-                                 mshr->allocOnFill());
+                                 targets.allocOnFill);
                 assert(blk != nullptr);
 
                 // treat as a fill, and discard the invalidation
@@ -1400,7 +1398,7 @@ Cache::recvTimingResp(PacketPtr pkt)
 
                 assert(tgt_pkt->req->masterId() < system->maxMasters());
                 missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] +=
-                    completion_time - target->recvTime;
+                    completion_time - target.recvTime;
             } else if (pkt->cmd == MemCmd::UpgradeFailResp) {
                 // failed StoreCond upgrade
                 assert(tgt_pkt->cmd == MemCmd::StoreCondReq ||
@@ -1462,10 +1460,8 @@ Cache::recvTimingResp(PacketPtr pkt)
             break;
 
           default:
-            panic("Illegal target->source enum %d\n", target->source);
+            panic("Illegal target->source enum %d\n", target.source);
         }
-
-        mshr->popTarget();
     }
 
     maintainClusivity(from_cache, blk);
diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc
index 86e77b186..e3ee44cc6 100644
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@@ -190,6 +190,7 @@ MSHR::TargetList::clearDownstreamPending()
             if (mshr != nullptr) {
                 mshr->clearDownstreamPending();
             }
+            t.markedPending = false;
         }
     }
 }
@@ -455,17 +456,54 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
     return true;
 }
 
+MSHR::TargetList
+MSHR::extractServiceableTargets(PacketPtr pkt)
+{
+    TargetList ready_targets;
+    // If the downstream MSHR got an invalidation request then we only
+    // service the first of the FromCPU targets and any other
+    // non-FromCPU target. This way the remaining FromCPU targets
+    // issue a new request and get a fresh copy of the block and we
+    // avoid memory consistency violations.
+    if (pkt->cmd == MemCmd::ReadRespWithInvalidate) {
+        auto it = targets.begin();
+        assert(it->source == Target::FromCPU);
+        ready_targets.push_back(*it);
+        it = targets.erase(it);
+        while (it != targets.end()) {
+            if (it->source == Target::FromCPU) {
+                it++;
+            } else {
+                assert(it->source == Target::FromSnoop);
+                ready_targets.push_back(*it);
+                it = targets.erase(it);
+            }
+        }
+        ready_targets.populateFlags();
+    } else {
+        std::swap(ready_targets, targets);
+    }
+    targets.populateFlags();
+
+    return ready_targets;
+}
 
 bool
 MSHR::promoteDeferredTargets()
 {
-    assert(targets.empty());
-    if (deferredTargets.empty()) {
-        return false;
-    }
+    if (targets.empty())  {
+        if (deferredTargets.empty()) {
+            return false;
+        }
 
-    // swap targets & deferredTargets lists
-    std::swap(targets, deferredTargets);
+        std::swap(targets, deferredTargets);
+    } else {
+        // If the targets list is not empty then we have one targets
+        // from the deferredTargets list to the targets list. A new
+        // request will then service the targets list.
+        targets.splice(targets.end(), deferredTargets);
+        targets.populateFlags();
+    }
 
     // clear deferredTargets flags
     deferredTargets.resetFlags();
diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh
index 437f8d4e4..1f59607bf 100644
--- a/src/mem/cache/mshr.hh
+++ b/src/mem/cache/mshr.hh
@@ -126,8 +126,24 @@ class MSHR : public QueueEntry, public Printable
         const Counter order;  //!< Global order (for memory consistency mgmt)
         const PacketPtr pkt;  //!< Pending request packet.
         const Source source;  //!< Request from cpu, memory, or prefetcher?
-        const bool markedPending; //!< Did we mark upstream MSHR
-                                  //!< as downstreamPending?
+
+        /**
+         * We use this flag to track whether we have cleared the
+         * downstreamPending flag for the MSHR of the cache above
+         * where this packet originates from and guard noninitial
+         * attempts to clear it.
+         *
+         * The flag markedPending needs to be updated when the
+         * TargetList is in service which can be:
+         * 1) during the Target instantiation if the MSHR is in
+         * service and the target is not deferred,
+         * 2) when the MSHR becomes in service if the target is not
+         * deferred,
+         * 3) or when the TargetList is promoted (deferredTargets ->
+         * targets).
+         */
+        bool markedPending;
+
         const bool allocOnFill;   //!< Should the response servicing this
                                   //!< target list allocate in the cache?
 
@@ -296,6 +312,20 @@ class MSHR : public QueueEntry, public Printable
     int getNumTargets() const
     { return targets.size() + deferredTargets.size(); }
 
+    /**
+     * Extracts the subset of the targets that can be serviced given a
+     * received response. This function returns the targets list
+     * unless the response is a ReadRespWithInvalidate. The
+     * ReadRespWithInvalidate is only invalidating response that its
+     * invalidation was not expected when the request (a
+     * ReadSharedReq) was sent out. For ReadRespWithInvalidate we can
+     * safely service only the first FromCPU target and all FromSnoop
+     * targets (inform all snoopers that we no longer have the block).
+     *
+     * @param pkt The response from the downstream memory
+     */
+    TargetList extractServiceableTargets(PacketPtr pkt);
+
     /**
      * Returns true if there are targets left.
      * @return true if there are targets