Getting closer...

configs/example/memtest.py: Add progress interval option. src/base/traceflags.py: Add MemTest flag. src/cpu/memtest/memtest.cc: Clean up tracing. src/cpu/memtest/memtest.hh: Get rid of unused code. --HG-- extra : convert_revision : 92bd8241a6c90bfb6d908e5a5132cbdb500cbb87
2007-06-21 11:59:17 -07:00 · 2007-06-21 11:59:17 -07:00 · 83af0fdcf5
commit 83af0fdcf5
parent d69a763833
16 changed files with 447 additions and 510 deletions
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@ -60,6 +60,11 @@ parser.add_option("-u", "--uncacheable", type="int", default=0,
                  help="Target percentage of uncacheable accesses "
                  "[default: %default]")

+parser.add_option("--progress", type="int", default=1000,
+                  metavar="NLOADS",
+                  help="Progress message interval "
+                  "[default: %default]")
+
 (options, args) = parser.parse_args()

 if args:
@ -112,7 +117,7 @@ if options.numtesters > block_size:
 cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
                 percent_functional=options.functional,
                 percent_uncacheable=options.uncacheable,
-                 progress_interval=1000)
+                 progress_interval=options.progress)
         for i in xrange(options.numtesters) ]

 # system simulated
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@ -128,6 +128,7 @@ baseFlags = [
    'Mbox',
    'MemDepUnit',
    'MemoryAccess',
+    'MemTest',
    'O3CPU',
    'OzoneCPU',
    'OzoneLSQ',
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@ -191,29 +191,25 @@ MemTest::init()
    // memory should be 0; no need to initialize them.
 }

-static void
-printData(ostream &os, uint8_t *data, int nbytes)
-{
-    os << hex << setfill('0');
-    // assume little-endian: print bytes from highest address to lowest
-    for (uint8_t *dp = data + nbytes - 1; dp >= data; --dp) {
-        os << setw(2) << (unsigned)*dp;
-    }
-    os << dec;
-}

 void
 MemTest::completeRequest(PacketPtr pkt)
 {
+    Request *req = pkt->req;
+
+    DPRINTF(MemTest, "completing %s at address %x (blk %x)\n",
+            pkt->isWrite() ? "write" : "read",
+            req->getPaddr(), blockAddr(req->getPaddr()));
+
    MemTestSenderState *state =
        dynamic_cast<MemTestSenderState *>(pkt->senderState);

    uint8_t *data = state->data;
    uint8_t *pkt_data = pkt->getPtr<uint8_t>();
-    Request *req = pkt->req;

    //Remove the address from the list of outstanding
-    std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->getPaddr());
+    std::set<unsigned>::iterator removeAddr =
+        outstandingAddrs.find(req->getPaddr());
    assert(removeAddr != outstandingAddrs.end());
    outstandingAddrs.erase(removeAddr);

@ -237,39 +233,17 @@ MemTest::completeRequest(PacketPtr pkt)
        }

        if (numReads >= maxLoads)
-            exitSimLoop("Maximum number of loads reached!");
+            exitSimLoop("maximum number of loads reached");
        break;

      case MemCmd::WriteResp:
        numWritesStat++;
        break;
-/*
-      case Copy:
-        //Also remove dest from outstanding list
-        removeAddr = outstandingAddrs.find(req->dest);
-        assert(removeAddr != outstandingAddrs.end());
-        outstandingAddrs.erase(removeAddr);
-        numCopiesStat++;
-        break;
-*/
+
      default:
        panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt());
    }

-    if (blockAddr(req->getPaddr()) == traceBlockAddr) {
-        cerr << name() << ": completed "
-             << (pkt->isWrite() ? "write" : "read")
-             << " access of "
-             << dec << pkt->getSize() << " bytes at address 0x"
-             << hex << req->getPaddr()
-             << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-             << ", value = 0x";
-        printData(cerr, pkt_data, pkt->getSize());
-        cerr << " @ cycle " << dec << curTick;
-
-        cerr << endl;
-    }
-
    noResponseCycles = 0;
    delete state;
    delete [] data;
@ -325,7 +299,7 @@ MemTest::tick()
    //mem tester
    //We can eliminate the lower bits of the offset, and then use the id
    //to offset within the blks
-    offset &= ~63; //Not the low order bits
+    offset = blockAddr(offset);
    offset += id;
    access_size = 0;

@ -351,29 +325,23 @@ MemTest::tick()
    if (cmd < percentReads) {
        // read

-        //For now we only allow one outstanding request per addreess per tester
-        //This means we assume CPU does write forwarding to reads that alias something
-        //in the cpu store buffer.
+        // For now we only allow one outstanding request per address
+        // per tester This means we assume CPU does write forwarding
+        // to reads that alias something in the cpu store buffer.
        if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
            delete [] result;
            delete req;
            return;
        }
-        else outstandingAddrs.insert(paddr);
+
+        outstandingAddrs.insert(paddr);

        // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin
        funcPort.readBlob(req->getPaddr(), result, req->getSize());

-        if (blockAddr(paddr) == traceBlockAddr) {
-            cerr << name()
-                 << ": initiating read "
-                 << ((probe) ? "probe of " : "access of ")
-                 << dec << req->getSize() << " bytes from addr 0x"
-                 << hex << paddr
-                 << " (0x" << hex << blockAddr(paddr) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }
+        DPRINTF(MemTest,
+                "initiating read at address %x (blk %x) expecting %x\n",
+                req->getPaddr(), blockAddr(req->getPaddr()), *result);

        PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
        pkt->dataDynamicArray(new uint8_t[req->getSize()]);
@ -385,36 +353,25 @@ MemTest::tick()
            pkt->makeAtomicResponse();
            completeRequest(pkt);
        } else {
-//	    req->completionEvent = new MemCompleteEvent(req, result, this);
            sendPkt(pkt);
        }
    } else {
        // write

-        //For now we only allow one outstanding request per addreess per tester
-        //This means we assume CPU does write forwarding to reads that alias something
-        //in the cpu store buffer.
+        // For now we only allow one outstanding request per addreess
+        // per tester.  This means we assume CPU does write forwarding
+        // to reads that alias something in the cpu store buffer.
        if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
            delete [] result;
            delete req;
            return;
        }

-        else outstandingAddrs.insert(paddr);
+        outstandingAddrs.insert(paddr);
+
+        DPRINTF(MemTest, "initiating write at address %x (blk %x) value %x\n",
+                req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff);

-/*
-        if (blockAddr(req->getPaddr()) == traceBlockAddr) {
-            cerr << name() << ": initiating write "
-                 << ((probe)?"probe of ":"access of ")
-                 << dec << req->getSize() << " bytes (value = 0x";
-            printData(cerr, data_pkt->getPtr(), req->getSize());
-            cerr << ") to addr 0x"
-                 << hex << req->getPaddr()
-                 << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }
-*/
        PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
        uint8_t *pkt_data = new uint8_t[req->getSize()];
        pkt->dataDynamicArray(pkt_data);
@ -429,54 +386,9 @@ MemTest::tick()
            pkt->makeAtomicResponse();
            completeRequest(pkt);
        } else {
-//	    req->completionEvent = new MemCompleteEvent(req, NULL, this);
            sendPkt(pkt);
        }
    }
-/*    else {
-        // copy
-        unsigned source_align = random() % 100;
-        unsigned dest_align = random() % 100;
-        unsigned offset2 = random() % size;
-
-        Addr source = ((base) ? baseAddr1 : baseAddr2) + offset;
-        Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2;
-        if (outstandingAddrs.find(source) != outstandingAddrs.end()) return;
-        else outstandingAddrs.insert(source);
-        if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return;
-        else outstandingAddrs.insert(dest);
-
-        if (source_align >= percentSourceUnaligned) {
-            source = blockAddr(source);
-        }
-        if (dest_align >= percentDestUnaligned) {
-            dest = blockAddr(dest);
-        }
-        req->cmd = Copy;
-        req->flags &= ~UNCACHEABLE;
-        req->paddr = source;
-        req->dest = dest;
-        delete [] req->data;
-        req->data = new uint8_t[blockSize];
-        req->size = blockSize;
-        if (source == traceBlockAddr || dest == traceBlockAddr) {
-            cerr << name()
-                 << ": initiating copy of "
-                 << dec << req->size << " bytes from addr 0x"
-                 << hex << source
-                 << " (0x" << hex << blockAddr(source) << ")"
-                 << " to addr 0x"
-                 << hex << dest
-                 << " (0x" << hex << blockAddr(dest) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }*
-        cacheInterface->access(req);
-        uint8_t result[blockSize];
-        checkMem->access(Read, source, &result, blockSize);
-        checkMem->access(Write, dest, &result, blockSize);
-    }
-*/
 }

 void
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@ -35,8 +35,6 @@
 #include <set>

 #include "base/statistics.hh"
-//#include "mem/functional/functional.hh"
-//#include "mem/mem_interface.hh"
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"
 #include "sim/sim_object.hh"
@ -50,9 +48,6 @@ class MemTest : public MemObject
  public:

    MemTest(const std::string &name,
-//	    MemInterface *_cache_interface,
-//	    PhysicalMemory *main_mem,
-//	    PhysicalMemory *check_mem,
            unsigned _memorySize,
            unsigned _percentReads,
            unsigned _percentFunctional,
@ -136,12 +131,7 @@ class MemTest : public MemObject
        uint8_t *data;
    };

-//    Request *dataReq;
    PacketPtr retryPkt;
-//    MemInterface *cacheInterface;
-//    PhysicalMemory *mainMem;
-//    PhysicalMemory *checkMem;
-//    SimpleThread *thread;

    bool accessRetry;

--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@ -50,8 +50,9 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)

 BaseCache::BaseCache(const std::string &name, Params &params)
    : MemObject(name),
-      mshrQueue(params.numMSHRs, 4),
-      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000),
+      mshrQueue(params.numMSHRs, 4, MSHRQueue_MSHRs),
+      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000,
+                  MSHRQueue_WriteBuffer),
      blkSize(params.blkSize),
      numTarget(params.numTargets),
      blocked(0),
@ -128,6 +129,7 @@ BaseCache::init()
    cpuSidePort->sendStatusChange(Port::RangeChange);
 }

+
 void
 BaseCache::regStats()
 {
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@ -54,41 +54,49 @@
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"

-/**
- * Reasons for Caches to be Blocked.
- */
-enum BlockedCause{
-    Blocked_NoMSHRs,
-    Blocked_NoTargets,
-    Blocked_NoWBBuffers,
-    Blocked_Coherence,
-    NUM_BLOCKED_CAUSES
-};
-
-/**
- * Reasons for cache to request a bus.
- */
-enum RequestCause{
-    Request_MSHR,
-    Request_WB,
-    Request_Coherence,
-    Request_PF
-};
-
 class MSHR;
 /**
 * A basic cache interface. Implements some common functions for speed.
 */
 class BaseCache : public MemObject
 {
+    /**
+     * Indexes to enumerate the MSHR queues.
+     */
+    enum MSHRQueueIndex {
+        MSHRQueue_MSHRs,
+        MSHRQueue_WriteBuffer
+    };
+
+    /**
+     * Reasons for caches to be blocked.
+     */
+    enum BlockedCause {
+        Blocked_NoMSHRs = MSHRQueue_MSHRs,
+        Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
+        Blocked_NoTargets,
+        NUM_BLOCKED_CAUSES
+    };
+
+  public:
+    /**
+     * Reasons for cache to request a bus.
+     */
+    enum RequestCause {
+        Request_MSHR = MSHRQueue_MSHRs,
+        Request_WB = MSHRQueue_WriteBuffer,
+        Request_PF,
+        NUM_REQUEST_CAUSES
+    };
+
+  private:
+
    class CachePort : public SimpleTimingPort
    {
      public:
        BaseCache *cache;

      protected:
-        Event *responseEvent;
-
        CachePort(const std::string &_name, BaseCache *_cache);

        virtual void recvStatusChange(Status status);
@ -154,6 +162,36 @@ class BaseCache : public MemObject
    /** Write/writeback buffer */
    MSHRQueue writeBuffer;

+    MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
+                                 PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHR *mshr = mq->allocate(addr, size, pkt);
+        mshr->order = order++;
+
+        if (mq->isFull()) {
+            setBlocked((BlockedCause)mq->index);
+        }
+
+        if (requestBus) {
+            requestMemSideBus((RequestCause)mq->index, time);
+        }
+
+        return mshr;
+    }
+
+    void markInServiceInternal(MSHR *mshr)
+    {
+        MSHRQueue *mq = mshr->queue;
+        bool wasFull = mq->isFull();
+        mq->markInService(mshr);
+        if (!mq->havePending()) {
+            deassertMemSideBusRequest((RequestCause)mq->index);
+        }
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
+    }
+
    /** Block size of this cache */
    const int blkSize;

@ -382,6 +420,31 @@ class BaseCache : public MemObject
    Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); }


+    MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        return allocateBufferInternal(&mshrQueue,
+                                      blockAlign(pkt->getAddr()), blkSize,
+                                      pkt, time, requestBus);
+    }
+
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHRQueue *mq = NULL;
+
+        if (pkt->isWrite() && !pkt->isRead()) {
+            /**
+             * @todo Add write merging here.
+             */
+            mq = &writeBuffer;
+        } else {
+            mq = &mshrQueue;
+        }
+
+        return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                      pkt, time, requestBus);
+    }
+
+
    /**
     * Returns true if the cache is blocked for accesses.
     */
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@ -179,7 +179,7 @@ class Cache : public BaseCache
     * @return Pointer to the cache block touched by the request. NULL if it
     * was a miss.
     */
-    bool access(PacketPtr pkt, BlkType *blk, int & lat);
+    bool access(PacketPtr pkt, BlkType *&blk, int &lat);

    /**
     *Handle doing the Compare and Swap function for SPARC.
@ -201,7 +201,7 @@ class Cache : public BaseCache

    bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
    bool satisfyTarget(MSHR::Target *target, BlkType *blk);
-    void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
+    bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);

    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);

@ -310,15 +310,16 @@ class Cache : public BaseCache
     * @param isFill Whether to fetch & allocate a block
     *               or just forward the request.
     */
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill,
-                         bool requestBus);
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus);

    /**
     * Selects a outstanding request to service.
     * @return The request to service, NULL if none found.
     */
+    PacketPtr getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                           bool needsExclusive);
    MSHR *getNextMSHR();
-    PacketPtr getPacket();
+    PacketPtr getTimingPacket();

    /**
     * Marks a request as in service (sent on the bus). This can have side
@ -328,13 +329,6 @@ class Cache : public BaseCache
     */
    void markInService(MSHR *mshr);

-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr pkt, Tick time);
-
    /**
     * Perform the given writeback request.
     * @param pkt The writeback request.
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@ -152,40 +152,21 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 template<class TagStore, class Coherence>
 MSHR *
 Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
-                                          bool isFill, bool requestBus)
+                                          bool requestBus)
 {
-    int  size = isFill ? blkSize : pkt->getSize();
-    Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr();
+    MSHRQueue *mq = NULL;

-    MSHR *mshr = NULL;
-
-    if (pkt->isWrite()) {
+    if (pkt->isWrite() && !pkt->isRead()) {
        /**
         * @todo Add write merging here.
         */
-        mshr = writeBuffer.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-
-        if (writeBuffer.isFull()) {
-            setBlocked(Blocked_NoWBBuffers);
-        }
-
-        if (requestBus) {
-            requestMemSideBus(Request_WB, time);
-        }
+        mq = &writeBuffer;
    } else {
-        mshr = mshrQueue.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-        if (mshrQueue.isFull()) {
-            setBlocked(Blocked_NoMSHRs);
-        }
-        if (requestBus) {
-            requestMemSideBus(Request_MSHR, time);
-        }
+        mq = &mshrQueue;
    }

-    assert(mshr != NULL);
-    return mshr;
+    return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                  pkt, time, requestBus);
 }


@ -193,33 +174,7 @@ template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::markInService(MSHR *mshr)
 {
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    /**
-     * @todo Should include MSHRQueue pointer in MSHR to select the correct
-     * one.
-     */
-    if (mshr->queue == &writeBuffer) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        unblock = writeBuffer.isFull();
-        writeBuffer.markInService(mshr);
-        if (!writeBuffer.havePending()){
-            deassertMemSideBusRequest(Request_WB);
-        }
-        if (unblock) {
-            // Do we really unblock?
-            unblock = !writeBuffer.isFull();
-            cause = Blocked_NoWBBuffers;
-        }
-    } else {
-        assert(mshr->queue == &mshrQueue);
-        unblock = mshrQueue.isFull();
-        mshrQueue.markInService(mshr);
-        if (!mshrQueue.havePending()){
-            deassertMemSideBusRequest(Request_MSHR);
-        }
+    markInServiceInternal(mshr);
 #if 0
        if (mshr->originalCmd == MemCmd::HardPFReq) {
            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@ -231,14 +186,6 @@ Cache<TagStore,Coherence>::markInService(MSHR *mshr)
            }
        }
 #endif
-        if (unblock) {
-            unblock = !mshrQueue.isFull();
-            cause = Blocked_NoMSHRs;
-        }
-    }
-    if (unblock) {
-        clearBlocked(cause);
-    }
 }


@ -275,9 +222,16 @@ Cache<TagStore,Coherence>::squash(int threadNum)

 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 {
+    if (pkt->req->isUncacheable())  {
+        blk = NULL;
+        lat = hitLatency;
+        return false;
+    }
+
    bool satisfied = false;  // assume the worst
+    blk = tags->findBlock(pkt->getAddr(), lat);

    if (prefetchAccess) {
        //We are determining prefetches on access stream, call prefetcher
@ -307,6 +261,8 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
            satisfied = true;

+            // Check RMW operations first since both isRead() and
+            // isWrite() will be true for them
            if (pkt->cmd == MemCmd::SwapReq) {
                cmpAndSwap(blk, pkt);
            } else if (pkt->isWrite()) {
@ -314,12 +270,16 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
                    blk->status |= BlkDirty;
                    pkt->writeDataToBlock(blk->data, blkSize);
                }
-            } else {
-                assert(pkt->isRead());
+            } else if (pkt->isRead()) {
                if (pkt->isLocked()) {
                    blk->trackLoadLocked(pkt);
                }
                pkt->setDataFromBlock(blk->data, blkSize);
+            } else {
+                // Not a read or write... must be an upgrade.  it's OK
+                // to just ack those as long as we have an exclusive
+                // copy at this level.
+                assert(pkt->cmd == MemCmd::UpgradeReq);
            }
        } else {
            // permission violation... nothing to do here, leave unsatisfied
@ -351,19 +311,24 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
    // we charge hitLatency for doing just about anything here
    Tick time =  curTick + hitLatency;

+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return true;
+    }
+
    if (pkt->req->isUncacheable()) {
-        allocateBuffer(pkt, time, false, true);
+        allocateBuffer(pkt, time, true);
        assert(pkt->needsResponse()); // else we should delete it here??
        return true;
    }

    PacketList writebacks;
    int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
    bool satisfied = false;

    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-
    MSHR *mshr = mshrQueue.findMatch(blk_addr);

    if (!mshr) {
@ -373,6 +338,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
        // cache block... a more aggressive system could detect the
        // overlap (if any) and forward data out of the MSHRs, but we
        // don't do that yet)
+        BlkType *blk = NULL;
        satisfied = access(pkt, blk, lat);
    }

@ -401,7 +367,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
    // copy writebacks to write buffer
    while (!writebacks.empty()) {
        PacketPtr wbPkt = writebacks.front();
-        allocateBuffer(wbPkt, time, false, true);
+        allocateBuffer(wbPkt, time, true);
        writebacks.pop_front();
    }

@ -435,7 +401,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
            // always mark as cache fill for now... if we implement
            // no-write-allocate or bypass accesses this will have to
            // be changed.
-            allocateBuffer(pkt, time, true, true);
+            allocateMissBuffer(pkt, time, true);
        }
    }

@ -449,45 +415,96 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 }


+template<class TagStore, class Coherence>
+PacketPtr
+Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                                        bool needsExclusive)
+{
+    bool blkValid = blk && blk->isValid();
+
+    if (cpu_pkt->req->isUncacheable()) {
+        assert(blk == NULL);
+        return NULL;
+    }
+
+    if (!blkValid &&
+        (cpu_pkt->cmd == MemCmd::Writeback ||
+         cpu_pkt->cmd == MemCmd::UpgradeReq)) {
+            // For now, writebacks from upper-level caches that
+            // completely miss in the cache just go through. If we had
+            // "fast write" support (where we could write the whole
+            // block w/o fetching new data) we might want to allocate
+            // on writeback misses instead.
+        return NULL;
+    }
+
+    MemCmd cmd;
+    const bool useUpgrades = true;
+    if (blkValid && useUpgrades) {
+        // only reason to be here is that blk is shared
+        // (read-only) and we need exclusive
+        assert(needsExclusive && !blk->isWritable());
+        cmd = MemCmd::UpgradeReq;
+    } else {
+        // block is invalid
+        cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    }
+    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
+
+    pkt->allocate();
+    return pkt;
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 {
+    int lat = hitLatency;
+
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return lat;
+    }
+
    // should assert here that there are no outstanding MSHRs or
    // writebacks... that would mean that someone used an atomic
    // access in timing mode

-    if (pkt->req->isUncacheable()) {
-        // Uncacheables just go through
-        return memSidePort->sendAtomic(pkt);
+    BlkType *blk = NULL;
+
+    if (!access(pkt, blk, lat)) {
+        // MISS
+        PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
+
+        bool isCacheFill = (busPkt != NULL);
+
+        if (busPkt == NULL) {
+            // just forwarding the same request to the next level
+            // no local cache operation involved
+            busPkt = pkt;
        }

-    PacketList writebacks;
-    int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
-    bool satisfied = access(pkt, blk, lat);
+        DPRINTF(Cache, "Sending an atomic %s for %x\n",
+                busPkt->cmdString(), busPkt->getAddr());

-    if (!satisfied) {
-        // MISS
-        CacheBlk::State old_state = (blk) ? blk->status : 0;
-        MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state);
-        Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize);
-        busPkt.allocate();
+#if TRACING_ON
+        CacheBlk::State old_state = blk ? blk->status : 0;
+#endif

-        DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                busPkt.cmdString(), busPkt.getAddr());
-
-        lat += memSidePort->sendAtomic(&busPkt);
+        lat += memSidePort->sendAtomic(busPkt);

        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                busPkt.cmdString(), busPkt.getAddr(), old_state);
+                busPkt->cmdString(), busPkt->getAddr(), old_state);

-        blk = handleFill(&busPkt, blk, writebacks);
+        if (isCacheFill) {
+            PacketList writebacks;
+            blk = handleFill(busPkt, blk, writebacks);
            bool status = satisfyCpuSideRequest(pkt, blk);
            assert(status);
-    }
-
-    // We now have the block one way or another (hit or completed miss)
+            delete busPkt;

            // Handle writebacks if needed
            while (!writebacks.empty()){
@ -496,6 +513,10 @@ Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
                writebacks.pop_front();
                delete wbPkt;
            }
+        }
+    }
+
+    // We now have the block one way or another (hit or completed miss)

    if (pkt->needsResponse()) {
        pkt->makeAtomicResponse();
@ -553,211 +574,6 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 //
 /////////////////////////////////////////////////////

-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt, Tick time)
-{
-    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
-#ifndef NDEBUG
-    int num_targets = mshr->getNumTargets();
-#endif
-
-    bool unblock = false;
-    bool unblock_target = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (mshr->isCacheFill) {
-#if 0
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-#endif
-        // targets were handled in the cache tags
-        if (mshr == noTargetMSHR) {
-            // we always clear at least one target
-            unblock_target = true;
-            cause = Blocked_NoTargets;
-            noTargetMSHR = NULL;
-        }
-
-        if (mshr->hasTargets()) {
-            // Didn't satisfy all the targets, need to resend
-            mshrQueue.markPending(mshr);
-            mshr->order = order++;
-            requestMemSideBus(Request_MSHR, time);
-        }
-        else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
-            }
-        }
-    } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
-        }
-        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
-            // Should only have 1 target if we had any
-            assert(num_targets == 1);
-            MSHR::Target *target = mshr->getTarget();
-            assert(target->cpuSide);
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                target->pkt->setData(pkt->getPtr<uint8_t>());
-            }
-            cpuSidePort->respond(target->pkt, time);
-            assert(!mshr->hasTargets());
-        }
-        else if (mshr->hasTargets()) {
-            //Must be a no_allocate with possibly more than one target
-            assert(!mshr->isCacheFill);
-            while (mshr->hasTargets()) {
-                MSHR::Target *target = mshr->getTarget();
-                assert(target->isCpuSide());
-                mshr->popTarget();
-                if (pkt->isRead()) {
-                    target->pkt->setData(pkt->getPtr<uint8_t>());
-                }
-                cpuSidePort->respond(target->pkt, time);
-            }
-        }
-
-        if (pkt->isWrite()) {
-            // If the wrtie buffer is full, we might unblock now
-            unblock = writeBuffer.isFull();
-            writeBuffer.deallocate(mshr);
-            if (unblock) {
-                // Did we really unblock?
-                unblock = !writeBuffer.isFull();
-                cause = Blocked_NoWBBuffers;
-            }
-        } else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
-            }
-        }
-    }
-    if (unblock || unblock_target) {
-        clearBlocked(cause);
-    }
-}
-
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
-{
-    Tick time = curTick + hitLatency;
-    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
-    assert(mshr);
-    if (pkt->result == Packet::Nacked) {
-        //pkt->reinitFromRequest();
-        warn("NACKs from devices not connected to the same bus "
-             "not implemented\n");
-        return;
-    }
-    assert(pkt->result != Packet::BadAddress);
-    assert(pkt->result == Packet::Success);
-    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
-
-    if (mshr->isCacheFill) {
-        DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
-                pkt->getAddr());
-        BlkType *blk = tags->findBlock(pkt->getAddr());
-        PacketList writebacks;
-        blk = handleFill(pkt, blk, writebacks);
-        satisfyMSHR(mshr, pkt, blk);
-        // copy writebacks to write buffer
-        while (!writebacks.empty()) {
-            PacketPtr wbPkt = writebacks.front();
-            allocateBuffer(wbPkt, time, false, true);
-            writebacks.pop_front();
-        }
-    }
-    handleResponse(pkt, time);
-}
-
-
-
-
-template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
-{
-    assert(blk && blk->isValid() && blk->isDirty());
-
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    Request *writebackReq =
-        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
-    PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
-    writeback->allocate();
-    std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
-
-    blk->status &= ~BlkDirty;
-    return writeback;
-}
-
-
-// Note that the reason we return a list of writebacks rather than
-// inserting them directly in the write buffer is that this function
-// is called by both atomic and timing-mode accesses, and in atomic
-// mode we don't mess with the write buffer (we just perform the
-// writebacks atomically once the original request is complete).
-template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
-                                      PacketList &writebacks)
-{
-    Addr addr = pkt->getAddr();
-
-    if (blk == NULL) {
-
-        // need to do a replacement
-        blk = tags->findReplacement(addr, writebacks);
-        if (blk->isValid()) {
-            DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
-                    blk->isDirty() ? "writeback" : "clean");
-
-            if (blk->isDirty()) {
-                // Save writeback packet for handling by caller
-                writebacks.push_back(writebackBlk(blk));
-            }
-        }
-
-        blk->tag = tags->extractTag(addr);
-        blk->status = coherence->getNewState(pkt);
-        assert(pkt->isRead());
-    } else {
-        // existing block... probably an upgrade
-        assert(blk->tag == tags->extractTag(addr));
-        // either we're getting new data or the block should already be valid
-        assert(pkt->isRead() || blk->isValid());
-        CacheBlk::State old_state = blk->status;
-        blk->status = coherence->getNewState(pkt, old_state);
-        if (blk->status != old_state)
-            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
-                    addr, old_state, blk->status);
-        else
-            warn("Changing state to same value\n");
-    }
-
-    // if we got new data, copy it in
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
-    }
-
-    blk->whenReady = pkt->finishTime;
-
-    return blk;
-}
-

 template<class TagStore, class Coherence>
 bool
@ -798,7 +614,7 @@ Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
 }

 template<class TagStore, class Coherence>
-void
+bool
 Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
                                       BlkType *blk)
 {
@ -818,7 +634,11 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
            // Invalid access, need to do another request
            // can occur if block is invalidated, or not correct
            // permissions
-            break;
+            MSHRQueue *mq = mshr->queue;
+            mq->markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return false;
        }


@ -840,6 +660,159 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
        cpuSidePort->respond(target->pkt, completion_time);
        mshr->popTarget();
    }
+
+    return true;
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
+{
+    Tick time = curTick + hitLatency;
+    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+    assert(mshr);
+    if (pkt->result == Packet::Nacked) {
+        //pkt->reinitFromRequest();
+        warn("NACKs from devices not connected to the same bus "
+             "not implemented\n");
+        return;
+    }
+    assert(pkt->result != Packet::BadAddress);
+    assert(pkt->result == Packet::Success);
+    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
+
+    MSHRQueue *mq = mshr->queue;
+    bool wasFull = mq->isFull();
+
+    if (mshr == noTargetMSHR) {
+        // we always clear at least one target
+        clearBlocked(Blocked_NoTargets);
+        noTargetMSHR = NULL;
+    }
+
+    // Can we deallocate MSHR when done?
+    bool deallocate = false;
+
+    if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
+        DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
+                pkt->getAddr());
+        BlkType *blk = tags->findBlock(pkt->getAddr());
+        PacketList writebacks;
+        blk = handleFill(pkt, blk, writebacks);
+        deallocate = satisfyMSHR(mshr, pkt, blk);
+        // copy writebacks to write buffer
+        while (!writebacks.empty()) {
+            PacketPtr wbPkt = writebacks.front();
+            allocateBuffer(wbPkt, time, true);
+            writebacks.pop_front();
+        }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
+
+        while (mshr->hasTargets()) {
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->isCpuSide());
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
+        }
+        assert(!mshr->hasTargets());
+        deallocate = true;
+    }
+
+    if (deallocate) {
+        mq->deallocate(mshr);
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
+    }
+}
+
+
+
+
+template<class TagStore, class Coherence>
+PacketPtr
+Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
+{
+    assert(blk && blk->isValid() && blk->isDirty());
+
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
+
+    Request *writebackReq =
+        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
+    PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
+    writeback->allocate();
+    std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
+
+    blk->status &= ~BlkDirty;
+    return writeback;
+}
+
+
+// Note that the reason we return a list of writebacks rather than
+// inserting them directly in the write buffer is that this function
+// is called by both atomic and timing-mode accesses, and in atomic
+// mode we don't mess with the write buffer (we just perform the
+// writebacks atomically once the original request is complete).
+template<class TagStore, class Coherence>
+typename Cache<TagStore,Coherence>::BlkType*
+Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
+                                      PacketList &writebacks)
+{
+    Addr addr = pkt->getAddr();
+
+    if (blk == NULL) {
+        // better have read new data
+        assert(pkt->isRead());
+
+        // need to do a replacement
+        blk = tags->findReplacement(addr, writebacks);
+        if (blk->isValid()) {
+            DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
+                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
+                    blk->isDirty() ? "writeback" : "clean");
+
+            if (blk->isDirty()) {
+                // Save writeback packet for handling by caller
+                writebacks.push_back(writebackBlk(blk));
+            }
+        }
+
+        blk->tag = tags->extractTag(addr);
+        blk->status = coherence->getNewState(pkt);
+    } else {
+        // existing block... probably an upgrade
+        assert(blk->tag == tags->extractTag(addr));
+        // either we're getting new data or the block should already be valid
+        assert(pkt->isRead() || blk->isValid());
+        CacheBlk::State old_state = blk->status;
+        blk->status = coherence->getNewState(pkt, old_state);
+        if (blk->status != old_state)
+            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
+                    addr, old_state, blk->status);
+        else
+            warn("Changing state to same value\n");
+    }
+
+    // if we got new data, copy it in
+    if (pkt->isRead()) {
+        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    }
+
+    blk->whenReady = pkt->finishTime;
+
+    return blk;
 }


@ -1052,7 +1025,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
            // (hwpf_mshr_misses)
            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
            // Don't request bus, since we already have it
-            return allocateBuffer(pkt, curTick, true, false);
+            return allocateMissBuffer(pkt, curTick, false);
        }
    }

@ -1062,7 +1035,7 @@ Cache<TagStore,Coherence>::getNextMSHR()

 template<class TagStore, class Coherence>
 PacketPtr
-Cache<TagStore,Coherence>::getPacket()
+Cache<TagStore,Coherence>::getTimingPacket()
 {
    MSHR *mshr = getNextMSHR();

@ -1073,30 +1046,21 @@ Cache<TagStore,Coherence>::getPacket()
    BlkType *blk = tags->findBlock(mshr->addr);

    // use request from 1st target
-    MSHR::Target *tgt1 = mshr->getTarget();
-    PacketPtr tgt1_pkt = tgt1->pkt;
-    PacketPtr pkt;
+    PacketPtr tgt_pkt = mshr->getTarget()->pkt;
+    PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);

-    if (mshr->isCacheFill) {
-        MemCmd cmd;
-        if (blk && blk->isValid()) {
-            // only reason to be here is that blk is shared
-            // (read-only) and we need exclusive
-            assert(mshr->needsExclusive && !blk->isWritable());
-            cmd = MemCmd::UpgradeReq;
-        } else {
-            // block is invalid
-            cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    mshr->isCacheFill = (pkt != NULL);
+
+    if (pkt == NULL) {
+        // make copy of current packet to forward
+        pkt = new Packet(tgt_pkt);
+        pkt->allocate();
+        if (pkt->isWrite()) {
+            pkt->setData(tgt_pkt->getPtr<uint8_t>());
        }
-        pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast);
-    } else {
-        assert(blk == NULL);
-        assert(mshr->getNumTargets() == 1);
-        pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast);
    }

    pkt->senderState = mshr;
-    pkt->allocate();
    return pkt;
 }

@ -1243,7 +1207,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
        waitingOnRetry = !success;
    } else {
        // check for non-response packets (requests & writebacks)
-        PacketPtr pkt = myCache()->getPacket();
+        PacketPtr pkt = myCache()->getTimingPacket();
        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);

        bool success = sendTiming(pkt);
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@ -259,7 +259,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
    MC::Command writeToSharedCmd =
        doUpgrades ? MC::UpgradeReq : MC::ReadExReq;
    MC::Command writeToSharedResp =
-        doUpgrades ? MC::UpgradeReq : MC::ReadExResp;
+        doUpgrades ? MC::UpgradeResp : MC::ReadExResp;

    // Note that all transitions by default cause a panic.
    // Override the valid transitions with the appropriate actions here.
@ -272,6 +272,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
    tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
    tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
    tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq);
    tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
    tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
    tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@ -54,12 +54,12 @@ MSHR::MSHR()
 }

 void
-MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target)
 {
    addr = _addr;
    size = _size;
    assert(target);
-    isCacheFill = cacheFill;
+    isCacheFill = false;
    needsExclusive = target->needsExclusive();
    _isUncacheable = target->req->isUncacheable();
    inService = false;
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@ -136,7 +136,7 @@ public:
     * @param size The number of bytes to request.
     * @param pkt  The original miss.
     */
-    void allocate(Addr addr, int size, PacketPtr pkt, bool isFill);
+    void allocate(Addr addr, int size, PacketPtr pkt);

    /**
     * Allocate this MSHR as a buffer for the given request.
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@ -36,8 +36,9 @@

 using namespace std;

-MSHRQueue::MSHRQueue(int num_entries, int reserve)
-    : numEntries(num_entries + reserve - 1), numReserve(reserve)
+MSHRQueue::MSHRQueue(int num_entries, int reserve, int _index)
+    : numEntries(num_entries + reserve - 1), numReserve(reserve),
+      index(_index)
 {
    allocated = 0;
    inServiceEntries = 0;
@ -107,14 +108,14 @@ MSHRQueue::findPending(Addr addr, int size) const
 }

 MSHR *
-MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill)
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt)
 {
    assert(!freeList.empty());
    MSHR *mshr = freeList.front();
    assert(mshr->getNumTargets() == 0);
    freeList.pop_front();

-    mshr->allocate(addr, size, pkt, isFill);
+    mshr->allocate(addr, size, pkt);
    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);

--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@ -74,6 +74,9 @@ class MSHRQueue
    int allocated;
    /** The number of entries that have been forwarded to the bus. */
    int inServiceEntries;
+    /** The index of this queue within the cache (MSHR queue vs. write
+     * buffer). */
+    const int index;

    /**
     * Create a queue with a given number of entries.
@ -81,7 +84,7 @@ class MSHRQueue
     * @param reserve The minimum number of entries needed to satisfy
     * any access.
     */
-    MSHRQueue(int num_entries, int reserve = 1);
+    MSHRQueue(int num_entries, int reserve, int index);

    /** Destructor */
    ~MSHRQueue();
@ -118,7 +121,7 @@ class MSHRQueue
     *
     * @pre There are free entries.
     */
-    MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt);

    /**
     * Removes the given MSHR from the queue. This places the MSHR on the
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@ -141,7 +141,7 @@ BasePrefetcher::getPacket()
            keepTrying = cache->inCache(pkt->getAddr());
        }
        if (pf.empty()) {
-            cache->deassertMemSideBusRequest(Request_PF);
+            cache->deassertMemSideBusRequest(BaseCache::Request_PF);
            if (keepTrying) return NULL; //None left, all were in cache
        }
    } while (keepTrying);
@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
            pfRemovedMSHR++;
            pf.erase(iter);
            if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
        }

        //Remove anything in queue with delay older than time
@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
                iter--;
            }
            if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
        }


@ -243,7 +243,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
            pf.push_back(prefetch);

            //Make sure to request the bus, with proper delay
-            cache->requestMemSideBus(Request_PF, prefetch->time);
+            cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);

            //Increment through the list
            addr++;
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@ -64,10 +64,8 @@ MemCmd::commandInfo[] =
    /* WriteResp */
    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" },
    /* Writeback */
-    { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
-            WritebackAck, "Writeback" },
-    /* WritebackAck */
-    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" },
+    { SET4(IsWrite, NeedsExclusive, IsRequest, HasData),
+            InvalidCmd, "Writeback" },
    /* SoftPFReq */
    { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
            SoftPFResp, "SoftPFReq" },
@ -88,7 +86,11 @@ MemCmd::commandInfo[] =
    { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse),
            InvalidCmd, "WriteInvalidateResp" },
    /* UpgradeReq */
-    { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" },
+    { SET4(IsInvalidate, NeedsExclusive, IsRequest, NeedsResponse),
+            UpgradeResp, "UpgradeReq" },
+    /* UpgradeResp */
+    { SET3(IsInvalidate, NeedsExclusive, IsResponse),
+            InvalidCmd, "UpgradeResp" },
    /* ReadExReq */
    { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse),
            ReadExResp, "ReadExReq" },
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@ -67,7 +67,6 @@ class MemCmd
        WriteReq,
        WriteResp,
        Writeback,
-        WritebackAck,
        SoftPFReq,
        HardPFReq,
        SoftPFResp,
@ -75,6 +74,7 @@ class MemCmd
        WriteInvalidateReq,
        WriteInvalidateResp,
        UpgradeReq,
+        UpgradeResp,
        ReadExReq,
        ReadExResp,
        LoadLockedReq,
@ -100,7 +100,6 @@ class MemCmd
        NeedsResponse,  //!< Requester needs response from target
        IsSWPrefetch,
        IsHWPrefetch,
-        IsUpgrade,
        IsLocked,       //!< Alpha/MIPS LL or SC access
        HasData,        //!< There is an associated payload
        NUM_COMMAND_ATTRIBUTES