Two fixes:

1. Requests are handled more properly now. They assume the memory system takes control of the request upon sending out an access. 2. load-load ordering is maintained. src/cpu/base_dyn_inst.hh: Update how requests are handled. The BaseDynInst should not be able to hold a pointer to the request because the request becomes owned by the memory system once it is sent out. Also include some functions to allow certain status bits to be cleared. src/cpu/base_dyn_inst_impl.hh: Update how requests are handled. The BaseDynInst should not be able to hold a pointer to the request because the request becomes owned by the memory system once it is sent out. src/cpu/o3/fetch_impl.hh: General correctness fixes. retryPkt is not necessarily always set, so handle it properly. Also consider the cache unblocked only when recvRetry is called. src/cpu/o3/lsq_unit.hh: Handle requests a little more correctly. Now that the requests aren't pointed to by the DynInst, be sure to delete the request if it's not being used by the memory system. Also be sure to not store-load forward from an uncacheable store. src/cpu/o3/lsq_unit_impl.hh: Check to make sure load-load ordering was maintained. Also handle requests a little more correctly. --HG-- extra : convert_revision : e86bead2886d02443cf77bf7a7a1492845e1690f
2007-03-23 11:33:08 -04:00 · 2007-03-23 11:33:08 -04:00 · 31e78b0b92
parent abb07d9da3
commit 31e78b0b92
5 changed files with 149 additions and 57 deletions
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** The kind of fault this instruction has generated. */
    Fault fault;

-    /** The memory request. */
-    Request *req;
-
    /** Pointer to the data for the memory access. */
    uint8_t *memData;

    /** The effective virtual address (lds & stores only). */
    Addr effAddr;

+    /** Is the effective virtual address valid. */
+    bool effAddrValid;
+
    /** The effective physical address. */
    Addr physEffAddr;

@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** Returns whether or not this instruction is ready to issue. */
    bool readyToIssue() const { return status[CanIssue]; }

+    /** Clears this instruction being able to issue. */
+    void clearCanIssue() { status.reset(CanIssue); }
+
    /** Sets this instruction as issued from the IQ. */
    void setIssued() { status.set(Issued); }

    /** Returns whether or not this instruction has issued. */
    bool isIssued() const { return status[Issued]; }

+    /** Clears this instruction as being issued. */
+    void clearIssued() { status.reset(Issued); }
+
    /** Sets this instruction as executed. */
    void setExecuted() { status.set(Executed); }

@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
     */
    bool eaCalcDone;

+    /** Is this instruction's memory access uncacheable. */
+    bool isUncacheable;
+
+    /** Has this instruction generated a memory request. */
+    bool reqMade;
+
  public:
    /** Sets the effective address. */
    void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** Whether or not the memory operation is done. */
    bool memOpDone;

+    /** Is this instruction's memory access uncacheable. */
+    bool uncacheable() { return isUncacheable; }
+
+    /** Has this instruction generated a memory request. */
+    bool hasRequest() { return reqMade; }
+
  public:
    /** Load queue index. */
    int16_t lqIdx;
@ -776,25 +794,25 @@ template<class T>
 inline Fault
 BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 {
-    // Sometimes reads will get retried, so they may come through here
-    // twice.
-    if (!req) {
-        req = new Request();
-        req->setVirt(asid, addr, sizeof(T), flags, this->PC);
-        req->setThreadContext(thread->readCpuId(), threadNumber);
-    } else {
-        assert(addr == req->getVaddr());
-    }
+    reqMade = true;
+    Request *req = new Request();
+    req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+    req->setThreadContext(thread->readCpuId(), threadNumber);

    if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
        TheISA::VMPageSize) {
+        delete req;
        return TheISA::genAlignmentFault();
    }

    fault = cpu->translateDataReadReq(req, thread);

+    if (req->isUncacheable())
+        isUncacheable = true;
+
    if (fault == NoFault) {
        effAddr = req->getVaddr();
+        effAddrValid = true;
        physEffAddr = req->getPaddr();
        memReqFlags = req->getFlags();

@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
        // Commit will have to clean up whatever happened.  Set this
        // instruction as executed.
        this->setExecuted();
+        delete req;
    }

    if (traceData) {
@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
        traceData->setData(data);
    }

-    assert(req == NULL);
-
-    req = new Request();
+    reqMade = true;
+    Request *req = new Request();
    req->setVirt(asid, addr, sizeof(T), flags, this->PC);
    req->setThreadContext(thread->readCpuId(), threadNumber);

    if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
        TheISA::VMPageSize) {
+        delete req;
        return TheISA::genAlignmentFault();
    }

    fault = cpu->translateDataWriteReq(req, thread);

+    if (req->isUncacheable())
+        isUncacheable = true;
+
    if (fault == NoFault) {
        effAddr = req->getVaddr();
+        effAddrValid = true;
        physEffAddr = req->getPaddr();
        memReqFlags = req->getFlags();
 #if 0
@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #else
        fault = cpu->write(req, data, sqIdx);
 #endif
-    }
-
-    if (res) {
-        // always return some result to keep misspeculated paths
-        // (which will ignore faults) deterministic
-        *res = (fault == NoFault) ? req->getScResult() : 0;
+    } else {
+        delete req;
    }

    return fault;
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@ -92,11 +92,13 @@ template <class Impl>
 void
 BaseDynInst<Impl>::initVars()
 {
-    req = NULL;
    memData = NULL;
    effAddr = 0;
+    effAddrValid = false;
    physEffAddr = 0;

+    isUncacheable = false;
+    reqMade = false;
    readyRegs = 0;

    instResult.integer = 0;
@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars()
 template <class Impl>
 BaseDynInst<Impl>::~BaseDynInst()
 {
-    if (req) {
-        delete req;
-    }
-
    if (memData) {
        delete [] memData;
    }
@ -271,7 +269,7 @@ void
 BaseDynInst<Impl>::markSrcRegReady()
 {
    if (++readyRegs == numSrcRegs()) {
-        status.set(CanIssue);
+        setCanIssue();
    }
 }

--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@ -619,6 +619,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                fault = TheISA::genMachineCheckFault();
                delete mem_req;
                memReq[tid] = NULL;
+                warn("Bad address!\n");
            }
            assert(retryPkt == NULL);
            assert(retryTid == -1);
@ -669,11 +670,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
    // Get rid of the retrying packet if it was from this thread.
    if (retryTid == tid) {
        assert(cacheBlocked);
-        cacheBlocked = false;
-        retryTid = -1;
-        delete retryPkt->req;
-        delete retryPkt;
+        if (retryPkt) {
+            delete retryPkt->req;
+            delete retryPkt;
+        }
        retryPkt = NULL;
+        retryTid = -1;
    }

    fetchStatus[tid] = Squashing;
@ -1152,7 +1154,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)

            ///FIXME This needs to be more robust in dealing with delay slots
 #if !ISA_HAS_DELAY_SLOT
-            predicted_branch |=
+//	    predicted_branch |=
 #endif
            lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
            predicted_branch |= (next_PC != fetch_NPC);
@ -1223,7 +1225,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
        // until commit handles the fault.  The only other way it can
        // wake up is if a squash comes along and changes the PC.
 #if FULL_SYSTEM
-        assert(numInst != fetchWidth);
+        assert(numInst < fetchWidth);
        // Get a sequence number.
        inst_seq = cpu->getAndIncrementInstSeq();
        // We will use a nop in order to carry the fault.
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
        (load_idx != loadHead || !load_inst->isAtCommit())) {
        iewStage->rescheduleMemInst(load_inst);
        ++lsqRescheduledLoads;
+
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete req;
        return TheISA::genMachineCheckFault();
    }

@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)

        if (store_size == 0)
            continue;
+        else if (storeQueue[store_idx].inst->uncacheable())
+            continue;
+
+        assert(storeQueue[store_idx].inst->effAddrValid);

        // Check if the store data is within the lower and upper bounds of
        // addresses that the request needs.
@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
            storeQueue[store_idx].inst->effAddr;

        // If the store's data has all of the data needed, we can forward.
-        if (store_has_lower_limit && store_has_upper_limit) {
+        if ((store_has_lower_limit && store_has_upper_limit)) {
            // Get shift amount for offset into the store's data.
            int shift_amt = req->getVaddr() & (store_size - 1);
            // @todo: Magic number, assumes byte addressing
@ -595,6 +604,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
            // If it's already been written back, then don't worry about
            // stalling on it.
            if (storeQueue[store_idx].completed) {
+                panic("Should not check one of these");
                continue;
            }

@ -613,6 +623,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
            // rescheduled eventually
            iewStage->rescheduleMemInst(load_inst);
            iewStage->decrWb(load_inst->seqNum);
+            load_inst->clearIssued();
            ++lsqRescheduledLoads;

            // Do not generate a writeback event as this instruction is not
@ -621,7 +632,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
                    "Store idx %i to load addr %#x\n",
                    store_idx, req->getVaddr());

-            ++lsqBlockedLoads;
+            // Must delete request now that it wasn't handed off to
+            // memory.  This is quite ugly.  @todo: Figure out the
+            // proper place to really handle request deletes.
+            delete req;
+
            return NoFault;
        }
    }
@ -653,8 +668,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
            // Delete state and data packet because a load retry
            // initiates a pipeline restart; it does not retry.
            delete state;
+            delete data_pkt->req;
            delete data_pkt;

+            req = NULL;
+
            if (result == Packet::BadAddress) {
                return TheISA::genMachineCheckFault();
            }
@ -668,6 +686,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
    // If the cache was blocked, or has become blocked due to the access,
    // handle it.
    if (lsq->cacheBlocked()) {
+        if (req)
+            delete req;
+
        ++lsqCacheBlocked;

        iewStage->decrWb(load_inst->seqNum);
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
    if (isSwitchedOut() || inst->isSquashed()) {
        iewStage->decrWb(inst->seqNum);
        delete state;
+        delete pkt->req;
        delete pkt;
        return;
    } else {
@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
    }

    delete state;
+    delete pkt->req;
    delete pkt;
 }

@ -403,12 +405,15 @@ template <class Impl>
 Fault
 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
 {
+    using namespace TheISA;
    // Execute a specific load.
    Fault load_fault = NoFault;

    DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
            inst->readPC(),inst->seqNum);

+    assert(!inst->isSquashed());
+
    load_fault = inst->initiateAcc();

    // If the instruction faulted, then we need to send it along to commit
@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
        // realizes there is activity.
        // Mark it as executed unless it is an uncached load that
        // needs to hit the head of commit.
-        if (!(inst->req && inst->req->isUncacheable()) ||
+        if (!(inst->hasRequest() && inst->uncacheable()) ||
            inst->isAtCommit()) {
            inst->setExecuted();
        }
        iewStage->instToCommit(inst);
        iewStage->activityThisCycle();
+    } else if (!loadBlocked()) {
+        assert(inst->effAddrValid);
+        int load_idx = inst->lqIdx;
+        incrLdIdx(load_idx);
+        while (load_idx != loadTail) {
+            // Really only need to check loads that have actually executed
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
+            // @todo: Fix this, magic number being used here
+            if (loadQueue[load_idx]->effAddrValid &&
+                (loadQueue[load_idx]->effAddr >> 8) ==
+                (inst->effAddr >> 8)) {
+                // A load incorrectly passed this load.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                DynInstPtr violator = loadQueue[load_idx];
+                if (!memDepViolator ||
+                    (violator->seqNum < memDepViolator->seqNum)) {
+                    memDepViolator = violator;
+                } else {
+                    break;
+                }
+
+                ++lsqMemOrderViolation;
+
+                return genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
    }

    return load_fault;
@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
    DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
            store_inst->readPC(), store_inst->seqNum);

+    assert(!store_inst->isSquashed());
+
    // Check the recently completed loads to see if any match this store's
    // address.  If so, then we have a memory ordering violation.
    int load_idx = store_inst->lqIdx;
@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
        ++storesToWB;
    }

-    if (!memDepViolator) {
-        while (load_idx != loadTail) {
-            // Really only need to check loads that have actually executed
-            // It's safe to check all loads because effAddr is set to
-            // InvalAddr when the dyn inst is created.
+    assert(store_inst->effAddrValid);
+    while (load_idx != loadTail) {
+        // Really only need to check loads that have actually executed
+        // It's safe to check all loads because effAddr is set to
+        // InvalAddr when the dyn inst is created.

-            // @todo: For now this is extra conservative, detecting a
-            // violation if the addresses match assuming all accesses
-            // are quad word accesses.
+        // @todo: For now this is extra conservative, detecting a
+        // violation if the addresses match assuming all accesses
+        // are quad word accesses.

-            // @todo: Fix this, magic number being used here
-            if ((loadQueue[load_idx]->effAddr >> 8) ==
-                (store_inst->effAddr >> 8)) {
-                // A load incorrectly passed this store.  Squash and refetch.
-                // For now return a fault to show that it was unsuccessful.
-                memDepViolator = loadQueue[load_idx];
-                ++lsqMemOrderViolation;
-
-                return genMachineCheckFault();
+        // @todo: Fix this, magic number being used here
+        if (loadQueue[load_idx]->effAddrValid &&
+            (loadQueue[load_idx]->effAddr >> 8) ==
+            (store_inst->effAddr >> 8)) {
+            // A load incorrectly passed this store.  Squash and refetch.
+            // For now return a fault to show that it was unsuccessful.
+            DynInstPtr violator = loadQueue[load_idx];
+            if (!memDepViolator ||
+                (violator->seqNum < memDepViolator->seqNum)) {
+                memDepViolator = violator;
+            } else {
+                break;
            }

-            incrLdIdx(load_idx);
+            ++lsqMemOrderViolation;
+
+            return genMachineCheckFault();
        }

-        // If we've reached this point, there was no violation.
-        memDepViolator = NULL;
+        incrLdIdx(load_idx);
    }

    return store_fault;
@ -659,7 +702,7 @@ LSQUnit<Impl>::writebackStores()
                panic("LSQ sent out a bad address for a completed store!");
            }
            // Need to handle becoming blocked on a store.
-            DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+            DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                    "retry later\n",
                    inst->seqNum);
            isStoreBlocked = true;
@ -734,6 +777,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
        }
    }

+    if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+        memDepViolator = NULL;
+    }
+
    int store_idx = storeTail;
    decrStIdx(store_idx);

@ -763,6 +810,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
        storeQueue[store_idx].inst = NULL;
        storeQueue[store_idx].canWB = 0;

+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete storeQueue[store_idx].req;
+
        storeQueue[store_idx].req = NULL;
        --stores;