From 7dde557fdc51140988092962137e1006d1609bea Mon Sep 17 00:00:00 2001
From: Ali Saidi <Ali.Saidi@ARM.com>
Date: Mon, 4 Apr 2011 11:42:23 -0500
Subject: [PATCH] O3: Tighten memory order violation checking to 16 bytes.

The comment in the code suggests that the checking granularity should be 16
bytes, however in reality the shift by 8 is 256 bytes which seems much
larger than required.
---
 src/cpu/base_dyn_inst.hh    |   5 ++
 src/cpu/o3/O3CPU.py         |   3 +
 src/cpu/o3/lsq_unit.hh      |  14 ++++
 src/cpu/o3/lsq_unit_impl.hh | 124 ++++++++++++++++--------------------
 4 files changed, 77 insertions(+), 69 deletions(-)
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 8b6662d70..a5357a7b0 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -243,6 +243,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** The effective virtual address (lds & stores only). */
     Addr effAddr;
 
+    /** The size of the request */
+    Addr effSize;
+
     /** Is the effective virtual address valid. */
     bool effAddrValid;
 
@@ -892,6 +895,7 @@ BaseDynInst<Impl>::readBytes(Addr addr, uint8_t *data,
     if (translationCompleted) {
         if (fault == NoFault) {
             effAddr = req->getVaddr();
+            effSize = size;
             effAddrValid = true;
             fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
         } else {
@@ -962,6 +966,7 @@ BaseDynInst<Impl>::writeBytes(uint8_t *data, unsigned size,
 
     if (fault == NoFault && translationCompleted) {
         effAddr = req->getVaddr();
+        effSize = size;
         effAddrValid = true;
         fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
     }
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index f7602cb86..f379fcd8a 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -118,6 +118,9 @@ class DerivO3CPU(BaseCPU):
 
     LQEntries = Param.Unsigned(32, "Number of load queue entries")
     SQEntries = Param.Unsigned(32, "Number of store queue entries")
+    LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
+    LSQCheckLoads = Param.Bool(True,
+        "Should dependency violations be checked for loads & stores or just stores")
     LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
     SSITSize = Param.Unsigned(1024, "Store set ID table size")
 
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2bb42cadc..bdc524dec 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -111,6 +111,12 @@ class LSQUnit {
     /** Inserts a store instruction. */
     void insertStore(DynInstPtr &store_inst);
 
+    /** Check for ordering violations in the LSQ
+     * @param load_idx index to start checking at
+     * @param inst the instruction to check
+     */
+    Fault checkViolations(int load_idx, DynInstPtr &inst);
+
     /** Executes a load instruction. */
     Fault executeLoad(DynInstPtr &inst);
 
@@ -366,6 +372,14 @@ class LSQUnit {
      */
     unsigned SQEntries;
 
+    /** The number of places to shift addresses in the LSQ before checking
+     * for dependency violations
+     */
+    unsigned depCheckShift;
+
+    /** Should loads be checked for dependency issues */
+    bool checkLoads;
+
     /** The number of load instructions in the LQ. */
     int loads;
     /** The number of store instructions in the SQ. */
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 1a4e686a3..70b87ff26 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -162,6 +162,9 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
     loadQueue.resize(LQEntries);
     storeQueue.resize(SQEntries);
 
+    depCheckShift = params->LSQDepCheckShift;
+    checkLoads = params->LSQCheckLoads;
+
     loadHead = loadTail = 0;
 
     storeHead = storeWBIdx = storeTail = 0;
@@ -436,6 +439,55 @@ LSQUnit<Impl>::numLoadsReady()
     return retval;
 }
 
+template <class Impl>
+Fault
+LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
+{
+    Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
+    Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
+
+    /** @todo in theory you only need to check an instruction that has executed
+     * however, there isn't a good way in the pipeline at the moment to check
+     * all instructions that will execute before the store writes back. Thus,
+     * like the implementation that came before it, we're overly conservative.
+     */
+    while (load_idx != loadTail) {
+        DynInstPtr ld_inst = loadQueue[load_idx];
+        if (!ld_inst->effAddrValid || ld_inst->uncacheable()) {
+            incrLdIdx(load_idx);
+            continue;
+        }
+
+        Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
+        Addr ld_eff_addr2 =
+            (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
+
+        if ((inst_eff_addr2 > ld_eff_addr1 && inst_eff_addr1 < ld_eff_addr2) ||
+               inst_eff_addr1 == ld_eff_addr1) {
+            // A load/store incorrectly passed this load/store.
+            // Check if we already have a violator, or if it's newer
+            // squash and refetch.
+            if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
+                break;
+
+            DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and [sn:%lli]"
+                    " at address %#x\n", inst->seqNum, ld_inst->seqNum,
+                    ld_eff_addr1);
+            memDepViolator = ld_inst;
+
+            ++lsqMemOrderViolation;
+
+            return TheISA::genMachineCheckFault();
+        }
+
+        incrLdIdx(load_idx);
+    }
+    return NoFault;
+}
+
+
+
+
 template <class Impl>
 Fault
 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
@@ -477,39 +529,9 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         assert(inst->effAddrValid);
         int load_idx = inst->lqIdx;
         incrLdIdx(load_idx);
-        while (load_idx != loadTail) {
-            // Really only need to check loads that have actually executed
 
-            // @todo: For now this is extra conservative, detecting a
-            // violation if the addresses match assuming all accesses
-            // are quad word accesses.
-
-            // @todo: Fix this, magic number being used here
-
-            // @todo: Uncachable load is not executed until it reaches
-            // the head of the ROB. Once this if checks only the executed
-            // loads(as noted above), this check can be removed
-            if (loadQueue[load_idx]->effAddrValid &&
-                ((loadQueue[load_idx]->effAddr >> 8)
-                 == (inst->effAddr >> 8)) &&
-                !loadQueue[load_idx]->uncacheable()) {
-                // A load incorrectly passed this load.  Squash and refetch.
-                // For now return a fault to show that it was unsuccessful.
-                DynInstPtr violator = loadQueue[load_idx];
-                if (!memDepViolator ||
-                    (violator->seqNum < memDepViolator->seqNum)) {
-                    memDepViolator = violator;
-                } else {
-                    break;
-                }
-
-                ++lsqMemOrderViolation;
-
-                return genMachineCheckFault();
-            }
-
-            incrLdIdx(load_idx);
-        }
+        if (checkLoads)
+            return checkViolations(load_idx, inst);
     }
 
     return load_fault;
@@ -564,44 +586,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
         ++storesToWB;
     }
 
-    assert(store_inst->effAddrValid);
-    while (load_idx != loadTail) {
-        // Really only need to check loads that have actually executed
-        // It's safe to check all loads because effAddr is set to
-        // InvalAddr when the dyn inst is created.
+    return checkViolations(load_idx, store_inst);
 
-        // @todo: For now this is extra conservative, detecting a
-        // violation if the addresses match assuming all accesses
-        // are quad word accesses.
-
-        // @todo: Fix this, magic number being used here
-
-        // @todo: Uncachable load is not executed until it reaches
-        // the head of the ROB. Once this if checks only the executed
-        // loads(as noted above), this check can be removed
-        if (loadQueue[load_idx]->effAddrValid &&
-            ((loadQueue[load_idx]->effAddr >> 8)
-             == (store_inst->effAddr >> 8)) &&
-            !loadQueue[load_idx]->uncacheable()) {
-            // A load incorrectly passed this store.  Squash and refetch.
-            // For now return a fault to show that it was unsuccessful.
-            DynInstPtr violator = loadQueue[load_idx];
-            if (!memDepViolator ||
-                (violator->seqNum < memDepViolator->seqNum)) {
-                memDepViolator = violator;
-            } else {
-                break;
-            }
-
-            ++lsqMemOrderViolation;
-
-            return genMachineCheckFault();
-        }
-
-        incrLdIdx(load_idx);
-    }
-
-    return store_fault;
 }
 
 template <class Impl>