Fixes to get prefetching working again.

Apparently we broke it with the cache rewrite and never noticed. Thanks to Bao Yungang <baoyungang@gmail.com> for a significant part of these changes (and for inspiring me to work on the rest). Some other overdue cleanup on the prefetch code too.
2009-02-16 08:56:40 -08:00 · 2009-02-16 08:56:40 -08:00 · 89a7fb0393
commit 89a7fb0393
parent 6923282fb5
18 changed files with 384 additions and 349 deletions
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@ -344,8 +344,12 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU, Port *ic, Port *dc)
        assert(newTC->threadId() == oldTC->threadId());
        system->replaceThreadContext(newTC, newTC->contextId());

-        if (DTRACE(Context))
+        /* This code no longer works since the zero register (e.g.,
+         * r31 on Alpha) doesn't necessarily contain zero at this
+         * point.
+           if (DTRACE(Context))
            ThreadContext::compare(oldTC, newTC);
+        */
    }

 #if FULL_SYSTEM
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@ -52,12 +52,10 @@ class BaseCache(MemObject):
    two_queue = Param.Bool(False,
        "whether the lifo should have two queue replacement")
    write_buffers = Param.Int(8, "number of write buffers")
-    prefetch_miss = Param.Bool(False,
-         "wheter you are using the hardware prefetcher from Miss stream")
-    prefetch_access = Param.Bool(False,
-         "wheter you are using the hardware prefetcher from Access stream")
+    prefetch_on_access = Param.Bool(False,
+         "notify the hardware prefetcher on every access (not just misses)")
    prefetcher_size = Param.Int(100,
-         "Number of entries in the harware prefetch queue")
+         "Number of entries in the hardware prefetch queue")
    prefetch_past_page = Param.Bool(False,
         "Allow prefetches to cross virtual page boundaries")
    prefetch_serial_squash = Param.Bool(False,
@ -69,9 +67,9 @@ class BaseCache(MemObject):
    prefetch_policy = Param.Prefetch('none',
         "Type of prefetcher to use")
    prefetch_cache_check_push = Param.Bool(True,
-         "Check if in cash on push or pop of prefetch queue")
+         "Check if in cache on push or pop of prefetch queue")
    prefetch_use_cpu_id = Param.Bool(True,
-         "Use the CPU ID to seperate calculations of prefetches")
+         "Use the CPU ID to separate calculations of prefetches")
    prefetch_data_accesses_only = Param.Bool(False,
         "Only prefetch on data not on instruction accesses")
    cpu_side = Port("Port on side closer to CPU")
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@ -445,12 +445,6 @@ class BaseCache : public MemObject
        }
    }

-    Tick nextMSHRReadyTime()
-    {
-        return std::min(mshrQueue.nextMSHRReadyTime(),
-                        writeBuffer.nextMSHRReadyTime());
-    }
-
    /**
     * Request the master bus for the given cause and time.
     * @param cause The reason for the request.
@ -467,10 +461,11 @@ class BaseCache : public MemObject
     */
    void deassertMemSideBusRequest(RequestCause cause)
    {
-        // obsolete!!
-        assert(false);
-        // memSidePort->deassertBusRequest(cause);
-        // checkDrain();
+        // Obsolete... we no longer signal bus requests explicitly so
+        // we can't deassert them.  Leaving this in as a no-op since
+        // the prefetcher calls it to indicate that it no longer wants
+        // to request a prefetch, and someday that might be
+        // interesting again.
    }

    virtual unsigned int drain(Event *de);
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@ -205,7 +205,7 @@ class CacheBlk
     * be touched.
     * @return True if the block was a hardware prefetch, unaccesed.
     */
-    bool isPrefetch() const
+    bool wasPrefetched() const
    {
        return (status & BlkHWPrefetched) != 0;
    }
--- a/src/mem/cache/builder.cc
+++ b/src/mem/cache/builder.cc
@ -38,7 +38,6 @@
 // Must be included first to determine which caches we want
 #include "enums/Prefetch.hh"
 #include "mem/config/cache.hh"
-#include "mem/config/prefetch.hh"
 #include "mem/cache/base.hh"
 #include "mem/cache/cache.hh"
 #include "mem/bus.hh"
@ -58,38 +57,32 @@
 #endif

 //Prefetcher Headers
-#if defined(USE_GHB)
 #include "mem/cache/prefetch/ghb.hh"
-#endif
-#if defined(USE_TAGGED)
 #include "mem/cache/prefetch/tagged.hh"
-#endif
-#if defined(USE_STRIDED)
 #include "mem/cache/prefetch/stride.hh"
-#endif


 using namespace std;
 using namespace TheISA;

-#define BUILD_CACHE(TAGS, tags)                                      \
-    do {                                                                \
-        BasePrefetcher *pf;                                             \
-        if (prefetch_policy == Enums::tagged) {                         \
-            BUILD_TAGGED_PREFETCHER(TAGS);                              \
-        }                                                               \
-        else if (prefetch_policy == Enums::stride) {                    \
-            BUILD_STRIDED_PREFETCHER(TAGS);                             \
-        }                                                               \
-        else if (prefetch_policy == Enums::ghb) {                       \
-            BUILD_GHB_PREFETCHER(TAGS);                                 \
-        }                                                               \
-        else {                                                          \
-            BUILD_NULL_PREFETCHER(TAGS);                                \
-        }                                                               \
-        Cache<TAGS> *retval =                                           \
-            new Cache<TAGS>(this, tags, pf);                            \
-        return retval;                                                  \
+#define BUILD_CACHE(TAGS, tags)                         \
+    do {                                                \
+        BasePrefetcher *pf;                             \
+        if (prefetch_policy == Enums::tagged) {         \
+            pf = new TaggedPrefetcher(this);            \
+        }                                               \
+        else if (prefetch_policy == Enums::stride) {    \
+            pf = new StridePrefetcher(this);            \
+        }                                               \
+        else if (prefetch_policy == Enums::ghb) {       \
+            pf = new GHBPrefetcher(this);               \
+        }                                               \
+        else {                                          \
+            pf = NULL;                                  \
+        }                                               \
+        Cache<TAGS> *retval =                           \
+            new Cache<TAGS>(this, tags, pf);            \
+        return retval;                                  \
    } while (0)

 #define BUILD_CACHE_PANIC(x) do {                       \
@ -135,37 +128,6 @@ using namespace TheISA;
        }                                               \
    } while (0)

-#define BUILD_COHERENCE(b) do {                                         \
-    } while (0)
-
-#if defined(USE_TAGGED)
-#define BUILD_TAGGED_PREFETCHER(t)                                      \
-    pf = new TaggedPrefetcher(this)
-#else
-#define BUILD_TAGGED_PREFETCHER(t) BUILD_CACHE_PANIC("Tagged Prefetcher")
-#endif
-
-#if defined(USE_STRIDED)
-#define BUILD_STRIDED_PREFETCHER(t)                                     \
-    pf = new StridePrefetcher(this)
-#else
-#define BUILD_STRIDED_PREFETCHER(t) BUILD_CACHE_PANIC("Stride Prefetcher")
-#endif
-
-#if defined(USE_GHB)
-#define BUILD_GHB_PREFETCHER(t)                                         \
-    pf = new GHBPrefetcher(this)
-#else
-#define BUILD_GHB_PREFETCHER(t) BUILD_CACHE_PANIC("GHB Prefetcher")
-#endif
-
-#if defined(USE_TAGGED)
-#define BUILD_NULL_PREFETCHER(t)                                        \
-    pf = new TaggedPrefetcher(this)
-#else
-#define BUILD_NULL_PREFETCHER(t) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)")
-#endif
-
 BaseCache *
 BaseCacheParams::create()
 {
@ -174,24 +136,6 @@ BaseCacheParams::create()
        subblock_size = block_size;
    }

-    //Warnings about prefetcher policy
-    if (prefetch_policy == Enums::none) {
-        if (prefetch_miss || prefetch_access)
-            panic("With no prefetcher, you shouldn't prefetch from"
-                  " either miss or access stream\n");
-    }
-
-    if (prefetch_policy == Enums::tagged || prefetch_policy == Enums::stride ||
-        prefetch_policy == Enums::ghb) {
-
-        if (!prefetch_miss && !prefetch_access)
-            warn("With this prefetcher you should chose a prefetch"
-                 " stream (miss or access)\nNo Prefetching will occur\n");
-
-        if (prefetch_miss && prefetch_access)
-            panic("Can't do prefetches from both miss and access stream");
-    }
-
 #if defined(USE_CACHE_IIC)
    // Build IIC params
    IIC::Params iic_params;
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@ -64,8 +64,6 @@ class Cache : public BaseCache
    /** A typedef for a list of BlkType pointers. */
    typedef typename TagStore::BlkList BlkList;

-    bool prefetchAccess;
-
  protected:

    class CpuSidePort : public CachePort
@ -141,7 +139,10 @@ class Cache : public BaseCache
     */
    const bool doFastWrites;

-    const bool prefetchMiss;
+    /**
+     * Notify the prefetcher on every access, not just misses.
+     */
+    const bool prefetchOnAccess;

    /**
     * Does all the processing necessary to perform the provided request.
@ -320,6 +321,11 @@ class Cache : public BaseCache
    bool inMissQueue(Addr addr) {
        return (mshrQueue.findMatch(addr) != 0);
    }
+
+    /**
+     * Find next request ready time from among possible sources.
+     */
+    Tick nextMSHRReadyTime();
 };

 #endif // __CACHE_HH__
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@ -53,11 +53,10 @@
 template<class TagStore>
 Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf)
    : BaseCache(p),
-      prefetchAccess(p->prefetch_access),
      tags(tags),
      prefetcher(pf),
      doFastWrites(true),
-      prefetchMiss(p->prefetch_miss)
+      prefetchOnAccess(p->prefetch_on_access)
 {
    tempBlock = new BlkType();
    tempBlock->data = new uint8_t[blkSize];
@ -72,7 +71,8 @@ Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf)
    memSidePort->setOtherPort(cpuSidePort);

    tags->setCache(this);
-    prefetcher->setCache(this);
+    if (prefetcher)
+        prefetcher->setCache(this);
 }

 template<class TagStore>
@ -81,7 +81,8 @@ Cache<TagStore>::regStats()
 {
    BaseCache::regStats();
    tags->regStats(name());
-    prefetcher->regStats(name());
+    if (prefetcher)
+        prefetcher->regStats(name());
 }

 template<class TagStore>
@ -271,29 +272,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,

    blk = tags->accessBlock(pkt->getAddr(), lat);

-    if (prefetchAccess) {
-        //We are determining prefetches on access stream, call prefetcher
-        prefetcher->handleMiss(pkt, curTick);
-    }
-
    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
            (blk) ? "hit" : "miss");

    if (blk != NULL) {

-        if (blk->isPrefetch()) {
-            //Signal that this was a hit under prefetch (no need for
-            //use prefetch (only can get here if true)
-            DPRINTF(HWPrefetch, "Hit a block that was prefetched\n");
-            blk->status &= ~BlkHWPrefetched;
-            if (prefetchMiss) {
-                //If we are using the miss stream, signal the
-                //prefetcher otherwise the access stream would have
-                //already signaled this hit
-                prefetcher->handleMiss(pkt, curTick);
-            }
-        }
-
        if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
            // OK to satisfy access
            hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
@ -448,6 +431,9 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
    }
 #endif

+    // track time of availability of next prefetch, if any
+    Tick next_pf_time = 0;
+
    bool needsResponse = pkt->needsResponse();

    if (satisfied) {
@ -457,10 +443,14 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
        } else {
            delete pkt;
        }
+
+        if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
+            if (blk)
+                blk->status &= ~BlkHWPrefetched;
+            next_pf_time = prefetcher->notify(pkt, time);
+        }
    } else {
        // miss
-        if (prefetchMiss)
-            prefetcher->handleMiss(pkt, time);

        Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
        MSHR *mshr = mshrQueue.findMatch(blk_addr);
@ -512,9 +502,16 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)

                allocateMissBuffer(pkt, time, true);
            }
+
+            if (prefetcher) {
+                next_pf_time = prefetcher->notify(pkt, time);
+            }
        }
    }

+    if (next_pf_time != 0)
+        requestMemSideBus(Request_PF, std::max(time, next_pf_time));
+
    // copy writebacks to write buffer
    while (!writebacks.empty()) {
        PacketPtr wbPkt = writebacks.front();
@ -663,6 +660,17 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
        }
    }

+    // Note that we don't invoke the prefetcher at all in atomic mode.
+    // It's not clear how to do it properly, particularly for
+    // prefetchers that aggressively generate prefetch candidates and
+    // rely on bandwidth contention to throttle them; these will tend
+    // to pollute the cache in atomic mode since there is no bandwidth
+    // contention.  If we ever do want to enable prefetching in atomic
+    // mode, though, this is the place to do it... see timingAccess()
+    // for an example (though we'd want to issue the prefetch(es)
+    // immediately rather than calling requestMemSideBus() as we do
+    // there).
+
    // Handle writebacks if needed
    while (!writebacks.empty()){
        PacketPtr wbPkt = writebacks.front();
@ -787,7 +795,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
    while (mshr->hasTargets()) {
        MSHR::Target *target = mshr->getTarget();

-        if (target->isCpuSide()) {
+        switch (target->source) {
+          case MSHR::Target::FromCPU:
            Tick completion_time;
            if (is_fill) {
                satisfyCpuSideRequest(target->pkt, blk);
@ -825,13 +834,27 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                target->pkt->cmd = MemCmd::ReadRespWithInvalidate;
            }
            cpuSidePort->respond(target->pkt, completion_time);
-        } else {
+            break;
+
+          case MSHR::Target::FromPrefetcher:
+            assert(target->pkt->cmd == MemCmd::HardPFReq);
+            if (blk)
+                blk->status |= BlkHWPrefetched;
+            delete target->pkt->req;
+            delete target->pkt;
+            break;
+
+          case MSHR::Target::FromSnoop:
            // I don't believe that a snoop can be in an error state
            assert(!is_error);
            // response to snoop request
            DPRINTF(Cache, "processing deferred snoop...\n");
            handleSnoop(target->pkt, blk, true, true,
                        mshr->pendingInvalidate || pkt->isInvalidate());
+            break;
+
+          default:
+            panic("Illegal target->source enum %d\n", target->source);
        }

        mshr->popTarget();
@ -1333,6 +1356,22 @@ Cache<TagStore>::getTimingPacket()
 }


+template<class TagStore>
+Tick
+Cache<TagStore>::nextMSHRReadyTime()
+{
+    Tick nextReady = std::min(mshrQueue.nextMSHRReadyTime(),
+                              writeBuffer.nextMSHRReadyTime());
+
+    if (prefetcher) {
+        nextReady = std::min(nextReady,
+                             prefetcher->nextPrefetchReadyTime());
+    }
+
+    return nextReady;
+}
+
+
 ///////////////
 //
 // CpuSidePort
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@ -64,9 +64,9 @@ MSHR::TargetList::TargetList()

 inline void
 MSHR::TargetList::add(PacketPtr pkt, Tick readyTime,
-                      Counter order, bool cpuSide, bool markPending)
+                      Counter order, Target::Source source, bool markPending)
 {
-    if (cpuSide) {
+    if (source != Target::FromSnoop) {
        if (pkt->needsExclusive()) {
            needsExclusive = true;
        }
@ -84,7 +84,7 @@ MSHR::TargetList::add(PacketPtr pkt, Tick readyTime,
        }
    }

-    push_back(Target(pkt, readyTime, order, cpuSide, markPending));
+    push_back(Target(pkt, readyTime, order, source, markPending));
 }


@ -141,7 +141,14 @@ print(std::ostream &os, int verbosity, const std::string &prefix) const
 {
    ConstIterator end_i = end();
    for (ConstIterator i = begin(); i != end_i; ++i) {
-        ccprintf(os, "%s%s: ", prefix, i->isCpuSide() ? "cpu" : "mem");
+        const char *s;
+        switch (i->source) {
+          case Target::FromCPU: s = "FromCPU";
+          case Target::FromSnoop: s = "FromSnoop";
+          case Target::FromPrefetcher: s = "FromPrefetcher";
+          default: s = "";
+        }
+        ccprintf(os, "%s%s: ", prefix, s);
        i->pkt->print(os, verbosity, "");
    }
 }
@ -162,10 +169,12 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
    downstreamPending = false;
    threadNum = 0;
    ntargets = 1;
-    // Don't know of a case where we would allocate a new MSHR for a
-    // snoop (mem-side request), so set cpuSide to true here.
    assert(targets->isReset());
-    targets->add(target, whenReady, _order, true, true);
+    // Don't know of a case where we would allocate a new MSHR for a
+    // snoop (mem-side request), so set source according to request here
+    Target::Source source = (target->cmd == MemCmd::HardPFReq) ?
+        Target::FromPrefetcher : Target::FromCPU;
+    targets->add(target, whenReady, _order, source, true);
    assert(deferredTargets->isReset());
    pendingInvalidate = false;
    pendingShared = false;
@ -230,17 +239,22 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order)
    //   comes back (but before this target is processed)
    // - the outstanding request is for a non-exclusive block and this
    //   target requires an exclusive block
+
+    // assume we'd never issue a prefetch when we've got an
+    // outstanding miss
+    assert(pkt->cmd != MemCmd::HardPFReq);
+
    if (inService &&
        (!deferredTargets->empty() || pendingInvalidate ||
         (!targets->needsExclusive && pkt->needsExclusive()))) {
        // need to put on deferred list
-        deferredTargets->add(pkt, whenReady, _order, true, true);
+        deferredTargets->add(pkt, whenReady, _order, Target::FromCPU, true);
    } else {
        // No request outstanding, or still OK to append to
        // outstanding request: append to regular target list.  Only
        // mark pending if current request hasn't been issued yet
        // (isn't in service).
-        targets->add(pkt, whenReady, _order, true, !inService);
+        targets->add(pkt, whenReady, _order, Target::FromCPU, !inService);
    }

    ++ntargets;
@ -291,7 +305,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
        // actual target device (typ. PhysicalMemory) will delete the
        // packet on reception, so we need to save a copy here
        PacketPtr cp_pkt = new Packet(pkt, true);
-        targets->add(cp_pkt, curTick, _order, false,
+        targets->add(cp_pkt, curTick, _order, Target::FromSnoop,
                     downstreamPending && targets->needsExclusive);
        ++ntargets;

--- a/src/mem/cache/mshr.hh
+++ b/src/mem/cache/mshr.hh
@ -55,20 +55,25 @@ class MSHR : public Packet::SenderState, public Printable

    class Target {
      public:
+
+        enum Source {
+            FromCPU,
+            FromSnoop,
+            FromPrefetcher
+        };
+
        Tick recvTime;  //!< Time when request was received (for stats)
        Tick readyTime; //!< Time when request is ready to be serviced
        Counter order;  //!< Global order (for memory consistency mgmt)
        PacketPtr pkt;  //!< Pending request packet.
-        bool cpuSide;   //!< Did request come from cpu side or mem side?
+        Source source;  //!< Did request come from cpu, memory, or prefetcher?
        bool markedPending; //!< Did we mark upstream MSHR
                            //!<  as downstreamPending?

-        bool isCpuSide() const { return cpuSide; }
-
        Target(PacketPtr _pkt, Tick _readyTime, Counter _order,
-               bool _cpuSide, bool _markedPending)
+               Source _source, bool _markedPending)
            : recvTime(curTick), readyTime(_readyTime), order(_order),
-              pkt(_pkt), cpuSide(_cpuSide), markedPending(_markedPending)
+              pkt(_pkt), source(_source), markedPending(_markedPending)
        {}
    };

@ -85,7 +90,7 @@ class MSHR : public Packet::SenderState, public Printable
        void resetFlags() { needsExclusive = hasUpgrade = false; }
        bool isReset()    { return !needsExclusive && !hasUpgrade; }
        void add(PacketPtr pkt, Tick readyTime, Counter order,
-                 bool cpuSide, bool markPending);
+                 Target::Source source, bool markPending);
        void replaceUpgrades();
        void clearDownstreamPending();
        bool checkFunctional(PacketPtr pkt);
@ -238,7 +243,7 @@ public:
        if (getNumTargets() != 1)
            return false;
        Target *tgt = getTarget();
-        return tgt->isCpuSide() && !tgt->pkt->needsResponse();
+        return tgt->source == Target::FromCPU && !tgt->pkt->needsResponse();
    }

    bool promoteDeferredTargets();
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@ -33,6 +33,7 @@
 * Hardware Prefetcher Definition.
 */

+#include "arch/isa_traits.hh"
 #include "base/trace.hh"
 #include "mem/cache/base.hh"
 #include "mem/cache/prefetch/base.hh"
@ -43,7 +44,7 @@ BasePrefetcher::BasePrefetcher(const BaseCacheParams *p)
    : size(p->prefetcher_size), pageStop(!p->prefetch_past_page),
      serialSquash(p->prefetch_serial_squash),
      cacheCheckPush(p->prefetch_cache_check_push),
-      only_data(p->prefetch_data_accesses_only)
+      onlyData(p->prefetch_data_accesses_only)
 {
 }

@ -52,6 +53,7 @@ BasePrefetcher::setCache(BaseCache *_cache)
 {
    cache = _cache;
    blkSize = cache->getBlockSize();
+    _name = cache->name() + "-pf";
 }

 void
@ -99,7 +101,8 @@ BasePrefetcher::regStats(const std::string &name)

    pfSquashed
        .name(name + ".prefetcher.num_hwpf_squashed_from_miss")
-        .desc("number of hwpf that got squashed due to a miss aborting calculation time")
+        .desc("number of hwpf that got squashed due to a miss "
+              "aborting calculation time")
        ;
 }

@ -126,60 +129,79 @@ BasePrefetcher::inMissQueue(Addr addr)
 PacketPtr
 BasePrefetcher::getPacket()
 {
-    DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name());
+    DPRINTF(HWPrefetch, "Requesting a hw_pf to issue\n");

    if (pf.empty()) {
-        DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name());
+        DPRINTF(HWPrefetch, "No HW_PF found\n");
        return NULL;
    }

    PacketPtr pkt;
-    bool keepTrying = false;
+    bool keep_trying = false;
    do {
        pkt = *pf.begin();
        pf.pop_front();
        if (!cacheCheckPush) {
-            keepTrying = cache->inCache(pkt->getAddr());
+            keep_trying = cache->inCache(pkt->getAddr());
        }
+
+        if (keep_trying) {
+            DPRINTF(HWPrefetch, "addr 0x%x in cache, skipping\n",
+                    pkt->getAddr());
+            delete pkt->req;
+            delete pkt;
+        }
+
        if (pf.empty()) {
            cache->deassertMemSideBusRequest(BaseCache::Request_PF);
-            if (keepTrying) return NULL; //None left, all were in cache
+            if (keep_trying) {
+                return NULL; // None left, all were in cache
+            }
        }
-    } while (keepTrying);
+    } while (keep_trying);

    pfIssued++;
+    assert(pkt != NULL);
+    DPRINTF(HWPrefetch, "returning 0x%x\n", pkt->getAddr());
    return pkt;
 }

-void
-BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
-{
-    if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data))
-    {
-        //Calculate the blk address
-        Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);

-        //Check if miss is in pfq, if so remove it
-        std::list<PacketPtr>::iterator iter = inPrefetch(blkAddr);
+Tick
+BasePrefetcher::notify(PacketPtr &pkt, Tick time)
+{
+    if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && onlyData)) {
+        // Calculate the blk address
+        Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+
+        // Check if miss is in pfq, if so remove it
+        std::list<PacketPtr>::iterator iter = inPrefetch(blk_addr);
        if (iter != pf.end()) {
-            DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name());
+            DPRINTF(HWPrefetch, "Saw a miss to a queued prefetch addr: "
+                    "0x%x, removing it\n", blk_addr);
            pfRemovedMSHR++;
+            delete (*iter)->req;
+            delete (*iter);
            pf.erase(iter);
            if (pf.empty())
                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
        }

-        //Remove anything in queue with delay older than time
-        //since everything is inserted in time order, start from end
-        //and work until pf.empty() or time is earlier
-        //This is done to emulate Aborting the previous work on a new miss
-        //Needed for serial calculators like GHB
+        // Remove anything in queue with delay older than time
+        // since everything is inserted in time order, start from end
+        // and work until pf.empty() or time is earlier
+        // This is done to emulate Aborting the previous work on a new miss
+        // Needed for serial calculators like GHB
        if (serialSquash) {
            iter = pf.end();
            iter--;
            while (!pf.empty() && ((*iter)->time >= time)) {
                pfSquashed++;
-                pf.pop_back();
+                DPRINTF(HWPrefetch, "Squashing old prefetch addr: 0x%x\n",
+                        (*iter)->getAddr());
+                delete (*iter)->req;
+                delete (*iter);
+                pf.erase(iter);
                iter--;
            }
            if (pf.empty())
@ -191,74 +213,70 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
        std::list<Tick> delays;
        calculatePrefetch(pkt, addresses, delays);

-        std::list<Addr>::iterator addr = addresses.begin();
-        std::list<Tick>::iterator delay = delays.begin();
-        while (addr != addresses.end())
-        {
-            DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name());
-            //temp calc this here...
+        std::list<Addr>::iterator addrIter = addresses.begin();
+        std::list<Tick>::iterator delayIter = delays.begin();
+        for (; addrIter != addresses.end(); ++addrIter, ++delayIter) {
+            Addr addr = *addrIter;
+
            pfIdentified++;
-            //create a prefetch memreq
-            Request * prefetchReq = new Request(*addr, blkSize, 0);
-            PacketPtr prefetch;
-            prefetch = new Packet(prefetchReq, MemCmd::HardPFReq, -1);
+
+            DPRINTF(HWPrefetch, "Found a pf candidate addr: 0x%x, "
+                    "inserting into prefetch queue with delay %d time %d\n",
+                    addr, *delayIter, time);
+
+            // Check if it is already in the cache
+            if (cacheCheckPush && cache->inCache(addr)) {
+                DPRINTF(HWPrefetch, "Prefetch addr already in cache\n");
+                continue;
+            }
+
+            // Check if it is already in the miss_queue
+            if (cache->inMissQueue(addr)) {
+                DPRINTF(HWPrefetch, "Prefetch addr already in miss queue\n");
+                continue;
+            }
+
+            // Check if it is already in the pf buffer
+            if (inPrefetch(addr) != pf.end()) {
+                pfBufferHit++;
+                DPRINTF(HWPrefetch, "Prefetch addr already in pf buffer\n");
+                continue;
+            }
+
+            // create a prefetch memreq
+            Request *prefetchReq = new Request(*addrIter, blkSize, 0);
+            PacketPtr prefetch =
+                new Packet(prefetchReq, MemCmd::HardPFReq, Packet::Broadcast);
            prefetch->allocate();
            prefetch->req->setThreadContext(pkt->req->contextId(),
                                            pkt->req->threadId());

-            prefetch->time = time + (*delay); //@todo ADD LATENCY HERE
-            //... initialize
+            prefetch->time = time + (*delayIter); // @todo ADD LATENCY HERE

-            //Check if it is already in the cache
-            if (cacheCheckPush) {
-                if (cache->inCache(prefetch->getAddr())) {
-                    addr++;
-                    delay++;
-                    continue;
-                }
-            }
-
-            //Check if it is already in the miss_queue
-            if (cache->inMissQueue(prefetch->getAddr())) {
-                addr++;
-                delay++;
-                continue;
-            }
-
-            //Check if it is already in the pf buffer
-            if (inPrefetch(prefetch->getAddr()) != pf.end()) {
-                pfBufferHit++;
-                addr++;
-                delay++;
-                continue;
-            }
-
-            //We just remove the head if we are full
-            if (pf.size() == size)
-            {
-                DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name());
+            // We just remove the head if we are full
+            if (pf.size() == size) {
                pfRemovedFull++;
+                PacketPtr old_pkt = *pf.begin();
+                DPRINTF(HWPrefetch, "Prefetch queue full, "
+                        "removing oldest 0x%x\n", old_pkt->getAddr());
+                delete old_pkt->req;
+                delete old_pkt;
                pf.pop_front();
            }

            pf.push_back(prefetch);
-
-            //Make sure to request the bus, with proper delay
-            cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);
-
-            //Increment through the list
-            addr++;
-            delay++;
        }
    }
+
+    return pf.empty() ? 0 : pf.front()->time;
 }

 std::list<PacketPtr>::iterator
 BasePrefetcher::inPrefetch(Addr address)
 {
-    //Guaranteed to only be one match, we always check before inserting
+    // Guaranteed to only be one match, we always check before inserting
    std::list<PacketPtr>::iterator iter;
-    for (iter=pf.begin(); iter != pf.end(); iter++) {
+    for (iter = pf.begin(); iter != pf.end(); iter++) {
        if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) {
            return iter;
        }
@ -266,4 +284,8 @@ BasePrefetcher::inPrefetch(Addr address)
    return pf.end();
 }

-
+bool
+BasePrefetcher::samePage(Addr a, Addr b)
+{
+    return roundDown(a, TheISA::VMPageSize) == roundDown(b, TheISA::VMPageSize);
+}
--- a/src/mem/cache/prefetch/base.hh
+++ b/src/mem/cache/prefetch/base.hh
@ -73,7 +73,9 @@ class BasePrefetcher
    bool cacheCheckPush;

    /** Do we prefetch on only data reads, or on inst reads as well. */
-    bool only_data;
+    bool onlyData;
+
+    std::string _name;

  public:

@ -90,13 +92,21 @@ class BasePrefetcher
    void regStats(const std::string &name);

  public:
+
    BasePrefetcher(const BaseCacheParams *p);

    virtual ~BasePrefetcher() {}

+    const std::string name() const { return _name; }
+
    void setCache(BaseCache *_cache);

-    void handleMiss(PacketPtr &pkt, Tick time);
+    /**
+     * Notify prefetcher of cache access (may be any access or just
+     * misses, depending on cache parameters.)
+     * @retval Time of next prefetch availability, or 0 if none.
+     */
+    Tick notify(PacketPtr &pkt, Tick time);

    bool inCache(Addr addr);

@ -109,11 +119,21 @@ class BasePrefetcher
        return !pf.empty();
    }

+    Tick nextPrefetchReadyTime()
+    {
+        return pf.empty() ? MaxTick : pf.front()->time;
+    }
+
    virtual void calculatePrefetch(PacketPtr &pkt,
                                   std::list<Addr> &addresses,
                                   std::list<Tick> &delays) = 0;

    std::list<PacketPtr>::iterator inPrefetch(Addr address);
+
+    /**
+     * Utility function: are addresses a and b on the same VM page?
+     */
+    bool samePage(Addr a, Addr b);
 };


--- a/src/mem/cache/prefetch/ghb.cc
+++ b/src/mem/cache/prefetch/ghb.cc
@ -41,32 +41,25 @@ void
 GHBPrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                                 std::list<Tick> &delays)
 {
-    Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
-    int contextId = pkt->req->contextId();
-    if (!useContextId) contextId = 0;
+    Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+    int ctx_id = useContextId ? pkt->req->contextId() : 0;
+    assert(ctx_id < Max_Contexts);

+    int new_stride = blk_addr - lastMissAddr[ctx_id];
+    int old_stride = lastMissAddr[ctx_id] - secondLastMissAddr[ctx_id];

-    int new_stride = blkAddr - last_miss_addr[contextId];
-    int old_stride = last_miss_addr[contextId] -
-        second_last_miss_addr[contextId];
-
-    second_last_miss_addr[contextId] = last_miss_addr[contextId];
-    last_miss_addr[contextId] = blkAddr;
+    secondLastMissAddr[ctx_id] = lastMissAddr[ctx_id];
+    lastMissAddr[ctx_id] = blk_addr;

    if (new_stride == old_stride) {
-        for (int d=1; d <= degree; d++) {
-            Addr newAddr = blkAddr + d * new_stride;
-            if (this->pageStop &&
-                (blkAddr & ~(TheISA::VMPageSize - 1)) !=
-                (newAddr & ~(TheISA::VMPageSize - 1)))
-            {
-                //Spanned the page, so now stop
-                this->pfSpanPage += degree - d + 1;
+        for (int d = 1; d <= degree; d++) {
+            Addr new_addr = blk_addr + d * new_stride;
+            if (pageStop && !samePage(blk_addr, new_addr)) {
+                // Spanned the page, so now stop
+                pfSpanPage += degree - d + 1;
                return;
-            }
-            else
-            {
-                addresses.push_back(newAddr);
+            } else {
+                addresses.push_back(new_addr);
                delays.push_back(latency);
            }
        }
--- a/src/mem/cache/prefetch/ghb.hh
+++ b/src/mem/cache/prefetch/ghb.hh
@ -42,8 +42,10 @@ class GHBPrefetcher : public BasePrefetcher
 {
  protected:

-    Addr second_last_miss_addr[64/*MAX_CPUS*/];
-    Addr last_miss_addr[64/*MAX_CPUS*/];
+    static const int Max_Contexts = 64;
+
+    Addr secondLastMissAddr[Max_Contexts];
+    Addr lastMissAddr[Max_Contexts];

    Tick latency;
    int degree;
--- a/src/mem/cache/prefetch/stride.cc
+++ b/src/mem/cache/prefetch/stride.cc
@ -34,59 +34,92 @@
 * Stride Prefetcher template instantiations.
 */

+#include "base/trace.hh"
 #include "mem/cache/prefetch/stride.hh"

 void
 StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                                    std::list<Tick> &delays)
 {
-//      Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1);
-    int contextId = pkt->req->contextId();
-    if (!useContextId) contextId = 0;
+    if (!pkt->req->hasPC()) {
+        DPRINTF(HWPrefetch, "ignoring request with no PC");
+        return;
+    }

-    /* Scan Table for IAddr Match */
-/*      std::list<strideEntry*>::iterator iter;
-  for (iter=table[contextId].begin();
-  iter !=table[contextId].end();
-  iter++) {
-  if ((*iter)->IAddr == pkt->pc) break;
-  }
+    Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+    int ctx_id = useContextId ? pkt->req->contextId() : 0;
+    Addr pc = pkt->req->getPC();
+    assert(ctx_id < Max_Contexts);
+    std::list<StrideEntry*> &tab = table[ctx_id];

-  if (iter != table[contextId].end()) {
-  //Hit in table
+    /* Scan Table for instAddr Match */
+    std::list<StrideEntry*>::iterator iter;
+    for (iter = tab.begin(); iter != tab.end(); iter++) {
+        if ((*iter)->instAddr == pc)
+            break;
+    }

-  int newStride = blkAddr - (*iter)->MAddr;
-  if (newStride == (*iter)->stride) {
-  (*iter)->confidence++;
-  }
-  else {
-  (*iter)->stride = newStride;
-  (*iter)->confidence--;
-  }
+    if (iter != tab.end()) {
+        // Hit in table

-  (*iter)->MAddr = blkAddr;
+        int new_stride = blk_addr - (*iter)->missAddr;
+        bool stride_match = (new_stride == (*iter)->stride);

-  for (int d=1; d <= degree; d++) {
-  Addr newAddr = blkAddr + d * newStride;
-  if (this->pageStop &&
-  (blkAddr & ~(TheISA::VMPageSize - 1)) !=
-  (newAddr & ~(TheISA::VMPageSize - 1)))
-  {
-  //Spanned the page, so now stop
-  this->pfSpanPage += degree - d + 1;
-  return;
-  }
-  else
-  {
-  addresses.push_back(newAddr);
-  delays.push_back(latency);
-  }
-  }
-  }
-  else {
-  //Miss in table
-  //Find lowest confidence and replace
+        if (stride_match && new_stride != 0) {
+            if ((*iter)->confidence < Max_Conf)
+                (*iter)->confidence++;
+        } else {
+            (*iter)->stride = new_stride;
+            if ((*iter)->confidence > Min_Conf)
+                (*iter)->confidence = 0;
+        }

-  }
-*/
+        DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x stride %d (%s), conf %d\n",
+                pc, blk_addr, new_stride, stride_match ? "match" : "change",
+                (*iter)->confidence);
+
+        (*iter)->missAddr = blk_addr;
+
+        if ((*iter)->confidence <= 0)
+            return;
+
+        for (int d = 1; d <= degree; d++) {
+            Addr new_addr = blk_addr + d * new_stride;
+            if (pageStop && !samePage(blk_addr, new_addr)) {
+                // Spanned the page, so now stop
+                pfSpanPage += degree - d + 1;
+                return;
+            } else {
+                DPRINTF(HWPrefetch, "  queuing prefetch to %x @ %d\n",
+                        new_addr, latency);
+                addresses.push_back(new_addr);
+                delays.push_back(latency);
+            }
+        }
+    } else {
+        // Miss in table
+        // Find lowest confidence and replace
+
+        DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x\n", pc, blk_addr);
+
+        if (tab.size() >= 256) { //set default table size is 256
+            std::list<StrideEntry*>::iterator min_pos = tab.begin();
+            int min_conf = (*min_pos)->confidence;
+            for (iter = min_pos, ++iter; iter != tab.end(); ++iter) {
+                if ((*iter)->confidence < min_conf){
+                    min_pos = iter;
+                    min_conf = (*iter)->confidence;
+                }
+            }
+            DPRINTF(HWPrefetch, "  replacing PC %x\n", (*min_pos)->instAddr);
+            tab.erase(min_pos);
+        }
+
+        StrideEntry *new_entry = new StrideEntry;
+        new_entry->instAddr = pc;
+        new_entry->missAddr = blk_addr;
+        new_entry->stride = 0;
+        new_entry->confidence = 0;
+        tab.push_back(new_entry);
+    }
 }
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@ -36,36 +36,36 @@
 #ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
 #define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__

+#include <limits.h>
 #include "mem/cache/prefetch/base.hh"

 class StridePrefetcher : public BasePrefetcher
 {
  protected:

-    class strideEntry
+    static const int Max_Contexts = 64;
+
+    // These constants need to be changed with the type of the
+    // 'confidence' field below.
+    static const int Max_Conf = INT_MAX;
+    static const int Min_Conf = INT_MIN;
+
+    class StrideEntry
    {
      public:
-        Addr IAddr;
-        Addr MAddr;
+        Addr instAddr;
+        Addr missAddr;
        int stride;
-        int64_t confidence;
-
-/*      bool operator < (strideEntry a,strideEntry b)
-        {
-            if (a.confidence == b.confidence) {
-                return true; //??????
-            }
-            else return a.confidence < b.confidence;
-            }*/
+        int confidence;
    };
-    Addr* lastMissAddr[64/*MAX_CPUS*/];

-    std::list<strideEntry*> table[64/*MAX_CPUS*/];
+    Addr *lastMissAddr[Max_Contexts];
+
+    std::list<StrideEntry*> table[Max_Contexts];
    Tick latency;
    int degree;
    bool useContextId;

-
  public:

    StridePrefetcher(const BaseCacheParams *p)
--- a/src/mem/cache/prefetch/tagged.cc
+++ b/src/mem/cache/prefetch/tagged.cc
@ -47,20 +47,15 @@ TaggedPrefetcher::
 calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
                  std::list<Tick> &delays)
 {
-    Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
+    Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);

-    for (int d=1; d <= degree; d++) {
-        Addr newAddr = blkAddr + d*(this->blkSize);
-        if (this->pageStop &&
-            (blkAddr & ~(TheISA::VMPageSize - 1)) !=
-            (newAddr & ~(TheISA::VMPageSize - 1)))
-        {
-            //Spanned the page, so now stop
-            this->pfSpanPage += degree - d + 1;
+    for (int d = 1; d <= degree; d++) {
+        Addr newAddr = blkAddr + d*(blkSize);
+        if (pageStop &&  !samePage(blkAddr, newAddr)) {
+            // Spanned the page, so now stop
+            pfSpanPage += degree - d + 1;
            return;
-        }
-        else
-        {
+        } else {
            addresses.push_back(newAddr);
            delays.push_back(latency);
        }
--- a/src/mem/config/prefetch.hh
+++ b/src/mem/config/prefetch.hh
@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Ron Dreslinski
- */
-
-/**
- * @file
- * Central location to configure which prefetch types we want to build
- * into the simulator.  In the future, this should probably be
- * autogenerated by some sort of configuration script.
- */
-#define USE_TAGGED 1 //Be sure not to turn this off, it is also used for no
-                     //prefetching case unless you always want to use a
-                     //different prefetcher
-//#define USE_STRIDED 1
-//#define USE_GHB 1
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@ -424,6 +424,12 @@ class Request : public FastAlloc
    }

    /** Accessor function for pc.*/
+    bool
+    hasPC() const
+    {
+        return flags.isSet(VALID_PC);
+    }
+
    Addr
    getPC() const
    {