From 94f94fbc555356d15c4f06aeb1a1fb5da53c365f Mon Sep 17 00:00:00 2001
From: Andreas Hansson <andreas.hansson@arm.com>
Date: Fri, 12 Aug 2016 14:11:45 +0100
Subject: [PATCH] mem: Update mostly exclusive cache policy to cover more cases

This patch changes how the mostly exclusive policy is enforced to
ensure that we drop blocks when we should. As part of this change, the
actual invalidation due to the clusivity enforcement is moved outside
the hit handling, to a separate method maintainClusivity. For the
timing mode that means we can deal with all MSHR targets before taking
any action and possibly dropping the block. The method
satisfyCpuSideRequest is also renamed satisfyRequest as part of this
change (since we only ever see requests from the cpu-side port).

Change-Id: If6f3d1e0c3e7be9a67b72a55e4fc2ec4a90fd3d2
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-by: Tony Gutierrez <anthony.gutierrez@amd.com>
---
 src/mem/cache/cache.cc | 83 ++++++++++++++++++++++++------------------
 src/mem/cache/cache.hh | 31 ++++++++++++++--
 2 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index c3f289123..10f39db3d 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -146,8 +146,8 @@ Cache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt)
 
 
 void
-Cache::satisfyCpuSideRequest(PacketPtr pkt, CacheBlk *blk,
-                             bool deferred_response, bool pending_downgrade)
+Cache::satisfyRequest(PacketPtr pkt, CacheBlk *blk,
+                      bool deferred_response, bool pending_downgrade)
 {
     assert(pkt->isRequest());
 
@@ -230,30 +230,22 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, CacheBlk *blk,
                         // (cacheResponding set, hasSharers not set)
                         pkt->setCacheResponding();
 
-                        if (clusivity == Enums::mostly_excl) {
-                            // if this cache is mostly exclusive with
-                            // respect to the cache above, drop the
-                            // block, no need to first unset the dirty
-                            // bit
-                            invalidateBlock(blk);
-                        } else {
-                            // if this cache is mostly inclusive, we
-                            // keep the block in the Exclusive state,
-                            // and pass it upwards as Modified
-                            // (writable and dirty), hence we have
-                            // multiple caches, all on the same path
-                            // towards memory, all considering the
-                            // same block writable, but only one
-                            // considering it Modified
+                        // if this cache is mostly inclusive, we
+                        // keep the block in the Exclusive state,
+                        // and pass it upwards as Modified
+                        // (writable and dirty), hence we have
+                        // multiple caches, all on the same path
+                        // towards memory, all considering the
+                        // same block writable, but only one
+                        // considering it Modified
 
-                            // we get away with multiple caches (on
-                            // the same path to memory) considering
-                            // the block writeable as we always enter
-                            // the cache hierarchy through a cache,
-                            // and first snoop upwards in all other
-                            // branches
-                            blk->status &= ~BlkDirty;
-                        }
+                        // we get away with multiple caches (on
+                        // the same path to memory) considering
+                        // the block writeable as we always enter
+                        // the cache hierarchy through a cache,
+                        // and first snoop upwards in all other
+                        // branches
+                        blk->status &= ~BlkDirty;
                     } else {
                         // if we're responding after our own miss,
                         // there's a window where the recipient didn't
@@ -433,12 +425,13 @@ Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
         // like a Writeback which could not find a replaceable block so has to
         // go to next level.
         return false;
-    } else if ((blk != nullptr) &&
-               (pkt->needsWritable() ? blk->isWritable() :
-                blk->isReadable())) {
+    } else if (blk && (pkt->needsWritable() ? blk->isWritable() :
+                       blk->isReadable())) {
         // OK to satisfy access
         incHitCount(pkt);
-        satisfyCpuSideRequest(pkt, blk);
+        satisfyRequest(pkt, blk);
+        maintainClusivity(pkt->fromCache(), blk);
+
         return true;
     }
 
@@ -456,6 +449,18 @@ Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
     return false;
 }
 
+void
+Cache::maintainClusivity(bool from_cache, CacheBlk *blk)
+{
+    if (from_cache && blk && blk->isValid() && !blk->isDirty() &&
+        clusivity == Enums::mostly_excl) {
+        // if we have responded to a cache, and our block is still
+        // valid, but not dirty, and this cache is mostly exclusive
+        // with respect to the cache above, drop the block
+        invalidateBlock(blk);
+    }
+}
+
 void
 Cache::doWritebacks(PacketList& writebacks, Tick forward_time)
 {
@@ -1073,14 +1078,15 @@ Cache::recvAtomic(PacketPtr pkt)
                                      allocOnFill(pkt->cmd));
                     assert(blk != NULL);
                     is_invalidate = false;
-                    satisfyCpuSideRequest(pkt, blk);
+                    satisfyRequest(pkt, blk);
                 } else if (bus_pkt->isRead() ||
                            bus_pkt->cmd == MemCmd::UpgradeResp) {
                     // we're updating cache state to allow us to
                     // satisfy the upstream request from the cache
                     blk = handleFill(bus_pkt, blk, writebacks,
                                      allocOnFill(pkt->cmd));
-                    satisfyCpuSideRequest(pkt, blk);
+                    satisfyRequest(pkt, blk);
+                    maintainClusivity(pkt->fromCache(), blk);
                 } else {
                     // we're satisfying the upstream request without
                     // modifying cache state, e.g., a write-through
@@ -1318,6 +1324,8 @@ Cache::recvTimingResp(PacketPtr pkt)
     // First offset for critical word first calculations
     int initial_offset = initial_tgt->pkt->getOffset(blkSize);
 
+    bool from_cache = false;
+
     while (mshr->hasTargets()) {
         MSHR::Target *target = mshr->getTarget();
         Packet *tgt_pkt = target->pkt;
@@ -1340,6 +1348,10 @@ Cache::recvTimingResp(PacketPtr pkt)
                 break; // skip response
             }
 
+            // keep track of whether we have responded to another
+            // cache
+            from_cache = from_cache || tgt_pkt->fromCache();
+
             // unlike the other packet flows, where data is found in other
             // caches or memory and brought back, write-line requests always
             // have the data right away, so the above check for "is fill?"
@@ -1362,8 +1374,7 @@ Cache::recvTimingResp(PacketPtr pkt)
             }
 
             if (is_fill) {
-                satisfyCpuSideRequest(tgt_pkt, blk,
-                                      true, mshr->hasPostDowngrade());
+                satisfyRequest(tgt_pkt, blk, true, mshr->hasPostDowngrade());
 
                 // How many bytes past the first request is this one
                 int transfer_offset =
@@ -1451,6 +1462,8 @@ Cache::recvTimingResp(PacketPtr pkt)
         mshr->popTarget();
     }
 
+    maintainClusivity(from_cache, blk);
+
     if (blk && blk->isValid()) {
         // an invalidate response stemming from a write line request
         // should not invalidate the block, so check if the
@@ -1725,7 +1738,7 @@ Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,
 
         // only read responses and write-line requests have data;
         // note that we don't write the data here for write-line - that
-        // happens in the subsequent satisfyCpuSideRequest.
+        // happens in the subsequent call to satisfyRequest
         assert(pkt->isRead() || pkt->cmd == MemCmd::WriteLineReq);
 
         // need to do a replacement if allocating, otherwise we stick
@@ -1764,7 +1777,7 @@ Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,
 
     // sanity check for whole-line writes, which should always be
     // marked as writable as part of the fill, and then later marked
-    // dirty as part of satisfyCpuSideRequest
+    // dirty as part of satisfyRequest
     if (pkt->cmd == MemCmd::WriteLineReq) {
         assert(!pkt->hasSharers());
         // at the moment other caches do not respond to the
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index e1c99ea04..4f9142f7c 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -302,6 +302,17 @@ class Cache : public BaseCache
      */
     void invalidateBlock(CacheBlk *blk);
 
+    /**
+     * Maintain the clusivity of this cache by potentially
+     * invalidating a block. This method works in conjunction with
+     * satisfyRequest, but is separate to allow us to handle all MSHR
+     * targets before potentially dropping a block.
+     *
+     * @param from_cache Whether we have dealt with a packet from a cache
+     * @param blk The block that should potentially be dropped
+     */
+    void maintainClusivity(bool from_cache, CacheBlk *blk);
+
     /**
      * Populates a cache block and handles all outstanding requests for the
      * satisfied fill request. This version takes two memory requests. One
@@ -401,10 +412,22 @@ class Cache : public BaseCache
      */
     void functionalAccess(PacketPtr pkt, bool fromCpuSide);
 
-    void satisfyCpuSideRequest(PacketPtr pkt, CacheBlk *blk,
-                               bool deferred_response = false,
-                               bool pending_downgrade = false);
-    bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, CacheBlk *blk);
+    /**
+     * Perform any necessary updates to the block and perform any data
+     * exchange between the packet and the block. The flags of the
+     * packet are also set accordingly.
+     *
+     * @param pkt Request packet from upstream that hit a block
+     * @param blk Cache block that the packet hit
+     * @param deferred_response Whether this hit is to block that
+     *                          originally missed
+     * @param pending_downgrade Whether the writable flag is to be removed
+     *
+     * @return True if the block is to be invalidated
+     */
+    void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
+                        bool deferred_response = false,
+                        bool pending_downgrade = false);
 
     void doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
                                 bool already_copied, bool pending_inval);