cpu: Tidy up the MemTest and make false sharing more obvious

The MemTest class really only tests false sharing, and as such there was a lot of old cruft that could be removed. This patch cleans up the tester, and also makes it more clear what the assumptions are. As part of this simplification the reference functional memory is also removed. The regression configs using MemTest are updated to reflect the changes, and the stats will be bumped in a separate patch. The example config will be updated in a separate patch due to more extensive re-work. In a follow-on patch a new tester will be introduced that uses the MemChecker to implement true sharing.
2015-02-11 10:23:28 -05:00 · 2015-02-11 10:23:28 -05:00 · 6563ec8634
commit 6563ec8634
parent 550c318490
7 changed files with 286 additions and 352 deletions
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@ -124,7 +124,7 @@ else:

 # build a list of prototypes, one for each level of treespec, starting
 # at the end (last entry is tester objects)
-prototypes = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
+prototypes = [ MemTest(max_loads=options.maxloads,
                       percent_functional=options.functional,
                       percent_uncacheable=options.uncacheable,
                       progress_interval=options.progress) ]
@ -146,12 +146,9 @@ for scale in treespec[:-2]:
     prototypes.insert(0, next)

 # system simulated
-system = System(funcmem = SimpleMemory(in_addr_map = False),
-                funcbus = NoncoherentXBar(),
-                physmem = SimpleMemory(latency = "100ns"),
+system = System(physmem = SimpleMemory(latency = "100ns"),
                cache_line_size = block_size)

-
 system.voltage_domain = VoltageDomain(voltage = '1V')

 system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
@ -182,14 +179,10 @@ def make_level(spec, prototypes, attach_obj, attach_port):
          # we just built the MemTest objects
          parent.cpu = objs
          for t in objs:
-               t.test = getattr(attach_obj, attach_port)
-               t.functional = system.funcbus.slave
+               t.port = getattr(attach_obj, attach_port)

 make_level(treespec, prototypes, system.physmem, "port")

-# connect reference memory to funcbus
-system.funcbus.master = system.funcmem.port
-
 # -----------------------
 # run simulation
 # -----------------------
@ -202,7 +195,7 @@ else:

 # The system port is never used in the tester so merely connect it
 # to avoid problems
-root.system.system_port = root.system.funcbus.slave
+root.system.system_port = root.system.physmem.cpu_side_bus.slave

 # Not much point in this being higher than the L1 latency
 m5.ticks.setGlobalFrequency('1ns')
--- a/src/cpu/testers/memtest/MemTest.py
+++ b/src/cpu/testers/memtest/MemTest.py
@ -1,3 +1,15 @@
+# Copyright (c) 2015 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2005-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
@ -25,6 +37,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # Authors: Nathan Binkert
+#          Andreas Hansson

 from MemObject import MemObject
 from m5.params import *
@ -33,26 +46,30 @@ from m5.proxy import *
 class MemTest(MemObject):
    type = 'MemTest'
    cxx_header = "cpu/testers/memtest/memtest.hh"
-    max_loads = Param.Counter(0, "number of loads to execute")
-    atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n")
-    memory_size = Param.Int(65536, "memory size")
-    percent_dest_unaligned = Param.Percent(50,
-        "percent of copy dest address that are unaligned")
-    percent_reads = Param.Percent(65, "target read percentage")
-    issue_dmas = Param.Bool(False, "this memtester should issue dma requests")
-    percent_source_unaligned = Param.Percent(50,
-        "percent of copy source address that are unaligned")
-    percent_functional = Param.Percent(50, "percent of access that are functional")
-    percent_uncacheable = Param.Percent(10,
-        "target uncacheable percentage")
+
+    # Interval of packet injection, the size of the memory range
+    # touched, and an optional stop condition
+    interval = Param.Cycles(1, "Interval between request packets")
+    size = Param.Unsigned(65536, "Size of memory region to use (bytes)")
+    max_loads = Param.Counter(0, "Number of loads to execute before exiting")
+
+    # Control the mix of packets and if functional accesses are part of
+    # the mix or not
+    percent_reads = Param.Percent(65, "Percentage reads")
+    percent_functional = Param.Percent(50, "Percentage functional accesses")
+    percent_uncacheable = Param.Percent(10, "Percentage uncacheable")
+
+    # Determine how often to print progress messages and what timeout
+    # to use for checking progress of both requests and responses
    progress_interval = Param.Counter(1000000,
-        "progress report interval (in accesses)")
-    trace_addr = Param.Addr(0, "address to trace")
+        "Progress report interval (in accesses)")
+    progress_check = Param.Cycles(5000000, "Cycles before exiting " \
+                                      "due to lack of progress")

-    test = MasterPort("Port to the memory system to test")
-    functional = MasterPort("Port to the functional memory " \
-                                "used for verification")
-    suppress_func_warnings = Param.Bool(False,
-        "suppress warnings when functional accesses fail.\n")
-    sys = Param.System(Parent.any, "System Parameter")
+    port = MasterPort("Port to the memory system")
+    system = Param.System(Parent.any, "System this tester is part of")

+    # Add the ability to supress error responses on functional
+    # accesses as Ruby needs this
+    suppress_func_warnings = Param.Bool(False, "Suppress warnings when "\
+                                            "functional accesses fail.")
--- a/src/cpu/testers/memtest/memtest.cc
+++ b/src/cpu/testers/memtest/memtest.cc
@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2015 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
 * Copyright (c) 2002-2005 The Regents of The University of Michigan
 * All rights reserved.
 *
@ -27,173 +39,129 @@
 *
 * Authors: Erik Hallnor
 *          Steve Reinhardt
+ *          Andreas Hansson
 */

-// FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded
-
-#include <iomanip>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "base/misc.hh"
 #include "base/random.hh"
 #include "base/statistics.hh"
 #include "cpu/testers/memtest/memtest.hh"
 #include "debug/MemTest.hh"
 #include "mem/mem_object.hh"
-#include "mem/packet.hh"
-#include "mem/port.hh"
-#include "mem/request.hh"
-#include "sim/sim_events.hh"
+#include "sim/sim_exit.hh"
 #include "sim/stats.hh"
 #include "sim/system.hh"

 using namespace std;

-int TESTER_ALLOCATOR=0;
+unsigned int TESTER_ALLOCATOR = 0;

 bool
 MemTest::CpuPort::recvTimingResp(PacketPtr pkt)
 {
-    memtest->completeRequest(pkt);
+    memtest.completeRequest(pkt);
    return true;
 }

 void
 MemTest::CpuPort::recvRetry()
 {
-    memtest->doRetry();
+    memtest.recvRetry();
 }

-void
+bool
 MemTest::sendPkt(PacketPtr pkt) {
    if (atomic) {
-        cachePort.sendAtomic(pkt);
+        port.sendAtomic(pkt);
        completeRequest(pkt);
-    }
-    else if (!cachePort.sendTimingReq(pkt)) {
-        DPRINTF(MemTest, "accessRetry setting to true\n");
-
-        //
-        // dma requests should never be retried
-        //
-        if (issueDmas) {
-            panic("Nacked DMA requests are not supported\n");
-        }
-        accessRetry = true;
-        retryPkt = pkt;
    } else {
-        if (issueDmas) {
-            dmaOutstanding = true;
+        if (!port.sendTimingReq(pkt)) {
+            retryPkt = pkt;
+            return false;
        }
    }
-
+    return true;
 }

 MemTest::MemTest(const Params *p)
    : MemObject(p),
      tickEvent(this),
-      cachePort("test", this),
-      funcPort("functional", this),
-      funcProxy(funcPort, p->sys->cacheLineSize()),
-      retryPkt(NULL),
-//      mainMem(main_mem),
-//      checkMem(check_mem),
-      size(p->memory_size),
+      noRequestEvent(this),
+      noResponseEvent(this),
+      port("port", *this),
+      retryPkt(nullptr),
+      size(p->size),
+      interval(p->interval),
      percentReads(p->percent_reads),
      percentFunctional(p->percent_functional),
      percentUncacheable(p->percent_uncacheable),
-      issueDmas(p->issue_dmas),
-      masterId(p->sys->getMasterId(name())),
-      blockSize(p->sys->cacheLineSize()),
+      masterId(p->system->getMasterId(name())),
+      blockSize(p->system->cacheLineSize()),
+      blockAddrMask(blockSize - 1),
      progressInterval(p->progress_interval),
+      progressCheck(p->progress_check),
      nextProgressMessage(p->progress_interval),
-      percentSourceUnaligned(p->percent_source_unaligned),
-      percentDestUnaligned(p->percent_dest_unaligned),
      maxLoads(p->max_loads),
-      atomic(p->atomic),
-      suppress_func_warnings(p->suppress_func_warnings)
+      atomic(p->system->isAtomicMode()),
+      suppressFuncWarnings(p->suppress_func_warnings)
 {
    id = TESTER_ALLOCATOR++;
+    fatal_if(id >= blockSize, "Too many testers, only %d allowed\n",
+             blockSize - 1);

-    // Needs to be masked off once we know the block size.
-    traceBlockAddr = p->trace_addr;
    baseAddr1 = 0x100000;
    baseAddr2 = 0x400000;
    uncacheAddr = 0x800000;

-    blockAddrMask = blockSize - 1;
-    traceBlockAddr = blockAddr(traceBlockAddr);
-
    // set up counters
-    noResponseCycles = 0;
    numReads = 0;
    numWrites = 0;
-    schedule(tickEvent, 0);

-    accessRetry = false;
-    dmaOutstanding = false;
+    // kick things into action
+    schedule(tickEvent, curTick());
+    schedule(noRequestEvent, clockEdge(progressCheck));
+    schedule(noResponseEvent, clockEdge(progressCheck));
 }

 BaseMasterPort &
 MemTest::getMasterPort(const std::string &if_name, PortID idx)
 {
-    if (if_name == "functional")
-        return funcPort;
-    else if (if_name == "test")
-        return cachePort;
+    if (if_name == "port")
+        return port;
    else
        return MemObject::getMasterPort(if_name, idx);
 }

 void
-MemTest::init()
-{
-    // initial memory contents for both physical memory and functional
-    // memory should be 0; no need to initialize them.
-}
-
-
-void
-MemTest::completeRequest(PacketPtr pkt)
+MemTest::completeRequest(PacketPtr pkt, bool functional)
 {
    Request *req = pkt->req;
+    assert(req->getSize() == 1);

-    if (issueDmas) {
-        dmaOutstanding = false;
-    }
+    // this address is no longer outstanding
+    auto remove_addr = outstandingAddrs.find(req->getPaddr());
+    assert(remove_addr != outstandingAddrs.end());
+    outstandingAddrs.erase(remove_addr);

-    DPRINTF(MemTest, "completing %s at address %x (blk %x) %s\n",
+    DPRINTF(MemTest, "Completing %s at address %x (blk %x) %s\n",
            pkt->isWrite() ? "write" : "read",
-            req->getPaddr(), blockAddr(req->getPaddr()),
+            req->getPaddr(), blockAlign(req->getPaddr()),
            pkt->isError() ? "error" : "success");

-    MemTestSenderState *state =
-        safe_cast<MemTestSenderState *>(pkt->senderState);
-
-    uint8_t *data = state->data;
-    // @todo: This should really be a const pointer
-    uint8_t *pkt_data = pkt->getPtr<uint8_t>();
-
-    //Remove the address from the list of outstanding
-    std::set<unsigned>::iterator removeAddr =
-        outstandingAddrs.find(req->getPaddr());
-    assert(removeAddr != outstandingAddrs.end());
-    outstandingAddrs.erase(removeAddr);
+    const uint8_t *pkt_data = pkt->getConstPtr<uint8_t>();

    if (pkt->isError()) {
-        if (!suppress_func_warnings) {
-          warn("Functional %s access failed at %#x\n",
-               pkt->isWrite() ? "write" : "read", req->getPaddr());
+        if (!functional || !suppressFuncWarnings) {
+            warn("%s access failed at %#x\n",
+                 pkt->isWrite() ? "Write" : "Read", req->getPaddr());
        }
    } else {
        if (pkt->isRead()) {
-            if (memcmp(pkt_data, data, pkt->getSize()) != 0) {
+            uint8_t ref_data = referenceData[req->getPaddr()];
+            if (pkt_data[0] != ref_data) {
                panic("%s: read of %x (blk %x) @ cycle %d "
                      "returns %x, expected %x\n", name(),
-                      req->getPaddr(), blockAddr(req->getPaddr()), curTick(),
-                      *pkt_data, *data);
+                      req->getPaddr(), blockAlign(req->getPaddr()), curTick(),
+                      pkt_data[0], ref_data);
            }

            numReads++;
@ -209,17 +177,21 @@ MemTest::completeRequest(PacketPtr pkt)
                exitSimLoop("maximum number of loads reached");
        } else {
            assert(pkt->isWrite());
-            funcProxy.writeBlob(req->getPaddr(), pkt_data, req->getSize());
+
+            // update the reference data
+            referenceData[req->getPaddr()] = pkt_data[0];
            numWrites++;
            numWritesStat++;
        }
    }

-    noResponseCycles = 0;
-    delete state;
-    delete [] data;
    delete pkt->req;
+
+    // the packet will delete the data
    delete pkt;
+
+    // finally shift the response timeout forward
+    reschedule(noResponseEvent, clockEdge(progressCheck), true);
 }

 void
@ -236,151 +208,126 @@ MemTest::regStats()
        .name(name() + ".num_writes")
        .desc("number of write accesses completed")
        ;
-
-    numCopiesStat
-        .name(name() + ".num_copies")
-        .desc("number of copy accesses completed")
-        ;
 }

 void
 MemTest::tick()
 {
-    if (!tickEvent.scheduled())
-        schedule(tickEvent, clockEdge(Cycles(1)));
+    // we should never tick if we are waiting for a retry
+    assert(!retryPkt);

-    if (++noResponseCycles >= 500000) {
-        if (issueDmas) {
-            cerr << "DMA tester ";
-        }
-        cerr << name() << ": deadlocked at cycle " << curTick() << endl;
-        fatal("");
-    }
-
-    if (accessRetry || (issueDmas && dmaOutstanding)) {
-        DPRINTF(MemTest, "MemTester waiting on accessRetry or DMA response\n");
-        return;
-    }
-
-    //make new request
+    // create a new request
    unsigned cmd = random_mt.random(0, 100);
-    unsigned offset = random_mt.random<unsigned>(0, size - 1);
-    unsigned base = random_mt.random(0, 1);
-    uint64_t data = random_mt.random<uint64_t>();
-    unsigned access_size = random_mt.random(0, 3);
+    uint8_t data = random_mt.random<uint8_t>();
    bool uncacheable = random_mt.random(0, 100) < percentUncacheable;
-
-    unsigned dma_access_size = random_mt.random(0, 3);
-
-    //If we aren't doing copies, use id as offset, and do a false sharing
-    //mem tester
-    //We can eliminate the lower bits of the offset, and then use the id
-    //to offset within the blks
-    offset = blockAddr(offset);
-    offset += id;
-    access_size = 0;
-    dma_access_size = 0;
-
+    unsigned base = random_mt.random(0, 1);
    Request::Flags flags;
    Addr paddr;

-    if (uncacheable) {
-        flags.set(Request::UNCACHEABLE);
-        paddr = uncacheAddr + offset;
-    } else  {
-        paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
-    }
+    // generate a unique address
+    do {
+        unsigned offset = random_mt.random<unsigned>(0, size - 1);

-    // For now we only allow one outstanding request per address
-    // per tester This means we assume CPU does write forwarding
-    // to reads that alias something in the cpu store buffer.
-    if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
-        return;
-    }
+        // use the tester id as offset within the block for false sharing
+        offset = blockAlign(offset);
+        offset += id;
+
+        if (uncacheable) {
+            flags.set(Request::UNCACHEABLE);
+            paddr = uncacheAddr + offset;
+        } else  {
+            paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
+        }
+    } while (outstandingAddrs.find(paddr) != outstandingAddrs.end());

    bool do_functional = (random_mt.random(0, 100) < percentFunctional) &&
        !uncacheable;
-    Request *req = nullptr;
-    uint8_t *result = new uint8_t[8];
+    Request *req = new Request(paddr, 1, flags, masterId);
+    req->setThreadContext(id, 0);

-    if (issueDmas) {
-        paddr &= ~((1 << dma_access_size) - 1);
-        req = new Request(paddr, 1 << dma_access_size, flags, masterId);
-        req->setThreadContext(id,0);
-    } else {
-        paddr &= ~((1 << access_size) - 1);
-        req = new Request(paddr, 1 << access_size, flags, masterId);
-        req->setThreadContext(id,0);
-    }
-    assert(req->getSize() == 1);
+    outstandingAddrs.insert(paddr);
+
+    // sanity check
+    panic_if(outstandingAddrs.size() > 100,
+             "Tester %s has more than 100 outstanding requests\n", name());
+
+    PacketPtr pkt = nullptr;
+    uint8_t *pkt_data = new uint8_t[1];

    if (cmd < percentReads) {
-        // read
-        outstandingAddrs.insert(paddr);
-
-        // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin
-        funcProxy.readBlob(req->getPaddr(), result, req->getSize());
+        // start by ensuring there is a reference value if we have not
+        // seen this address before
+        uint8_t M5_VAR_USED ref_data = 0;
+        auto ref = referenceData.find(req->getPaddr());
+        if (ref == referenceData.end()) {
+            referenceData[req->getPaddr()] = 0;
+        } else {
+            ref_data = ref->second;
+        }

        DPRINTF(MemTest,
-                "id %d initiating %sread at addr %x (blk %x) expecting %x\n",
-                id, do_functional ? "functional " : "", req->getPaddr(),
-                blockAddr(req->getPaddr()), *result);
-
-        PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
-        pkt->dataDynamic(new uint8_t[req->getSize()]);
-        MemTestSenderState *state = new MemTestSenderState(result);
-        pkt->senderState = state;
-
-        if (do_functional) {
-            assert(pkt->needsResponse());
-            pkt->setSuppressFuncError();
-            cachePort.sendFunctional(pkt);
-            completeRequest(pkt);
-        } else {
-            sendPkt(pkt);
-        }
-    } else {
-        // write
-        outstandingAddrs.insert(paddr);
-
-        DPRINTF(MemTest, "initiating %swrite at addr %x (blk %x) value %x\n",
+                "Initiating %sread at addr %x (blk %x) expecting %x\n",
                do_functional ? "functional " : "", req->getPaddr(),
-                blockAddr(req->getPaddr()), data & 0xff);
+                blockAlign(req->getPaddr()), ref_data);

-        PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
-        uint8_t *pkt_data = new uint8_t[req->getSize()];
+        pkt = new Packet(req, MemCmd::ReadReq);
        pkt->dataDynamic(pkt_data);
-        memcpy(pkt_data, &data, req->getSize());
-        MemTestSenderState *state = new MemTestSenderState(result);
-        pkt->senderState = state;
+    } else {
+        DPRINTF(MemTest, "Initiating %swrite at addr %x (blk %x) value %x\n",
+                do_functional ? "functional " : "", req->getPaddr(),
+                blockAlign(req->getPaddr()), data);

-        if (do_functional) {
-            pkt->setSuppressFuncError();
-            cachePort.sendFunctional(pkt);
-            completeRequest(pkt);
-        } else {
-            sendPkt(pkt);
-        }
+        pkt = new Packet(req, MemCmd::WriteReq);
+        pkt->dataDynamic(pkt_data);
+        pkt_data[0] = data;
+    }
+
+    // there is no point in ticking if we are waiting for a retry
+    bool keep_ticking = true;
+    if (do_functional) {
+        pkt->setSuppressFuncError();
+        port.sendFunctional(pkt);
+        completeRequest(pkt, true);
+    } else {
+        keep_ticking = sendPkt(pkt);
+    }
+
+    if (keep_ticking) {
+        // schedule the next tick
+        schedule(tickEvent, clockEdge(interval));
+
+        // finally shift the timeout for sending of requests forwards
+        // as we have successfully sent a packet
+        reschedule(noRequestEvent, clockEdge(progressCheck), true);
+    } else {
+        DPRINTF(MemTest, "Waiting for retry\n");
    }
 }

 void
-MemTest::doRetry()
+MemTest::noRequest()
 {
-    if (cachePort.sendTimingReq(retryPkt)) {
-        DPRINTF(MemTest, "accessRetry setting to false\n");
-        accessRetry = false;
-        retryPkt = NULL;
-    }
+    panic("%s did not send a request for %d cycles", name(), progressCheck);
 }

-
 void
-MemTest::printAddr(Addr a)
+MemTest::noResponse()
 {
-    cachePort.printAddr(a);
+    panic("%s did not see a response for %d cycles", name(), progressCheck);
 }

+void
+MemTest::recvRetry()
+{
+    assert(retryPkt);
+    if (port.sendTimingReq(retryPkt)) {
+        DPRINTF(MemTest, "Proceeding after successful retry\n");
+
+        retryPkt = nullptr;
+        // kick things into action again
+        schedule(tickEvent, clockEdge(interval));
+    }
+}

 MemTest *
 MemTestParams::create()
--- a/src/cpu/testers/memtest/memtest.hh
+++ b/src/cpu/testers/memtest/memtest.hh
@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2015 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
 * Copyright (c) 2002-2005 The Regents of The University of Michigan
 * All rights reserved.
 *
@ -27,166 +39,152 @@
 *
 * Authors: Erik Hallnor
 *          Steve Reinhardt
+ *          Andreas Hansson
 */

 #ifndef __CPU_MEMTEST_MEMTEST_HH__
 #define __CPU_MEMTEST_MEMTEST_HH__

 #include <set>
+#include <unordered_map>

 #include "base/statistics.hh"
 #include "mem/mem_object.hh"
-#include "mem/port.hh"
-#include "mem/port_proxy.hh"
 #include "params/MemTest.hh"
 #include "sim/eventq.hh"
-#include "sim/sim_exit.hh"
-#include "sim/sim_object.hh"
 #include "sim/stats.hh"

-class Packet;
+/**
+ * The MemTest class tests a cache coherent memory system by
+ * generating false sharing and verifying the read data against a
+ * reference updated on the completion of writes. Each tester reads
+ * and writes a specific byte in a cache line, as determined by its
+ * unique id. Thus, all requests issued by the MemTest instance are a
+ * single byte and a specific address is only ever touched by a single
+ * tester.
+ *
+ * In addition to verifying the data, the tester also has timeouts for
+ * both requests and responses, thus checking that the memory-system
+ * is making progress.
+ */
 class MemTest : public MemObject
 {
+
  public:
+
    typedef MemTestParams Params;
    MemTest(const Params *p);

-    virtual void init();
-
-    // register statistics
    virtual void regStats();

-    // main simulation loop (one cycle)
-    void tick();
-
    virtual BaseMasterPort &getMasterPort(const std::string &if_name,
                                          PortID idx = InvalidPortID);

-    /**
-     * Print state of address in memory system via PrintReq (for
-     * debugging).
-     */
-    void printAddr(Addr a);
-
  protected:
-    class TickEvent : public Event
-    {
-      private:
-        MemTest *cpu;

-      public:
-        TickEvent(MemTest *c) : Event(CPU_Tick_Pri), cpu(c) {}
-        void process() { cpu->tick(); }
-        virtual const char *description() const { return "MemTest tick"; }
-    };
+    void tick();

-    TickEvent tickEvent;
+    EventWrapper<MemTest, &MemTest::tick> tickEvent;
+
+    void noRequest();
+
+    EventWrapper<MemTest, &MemTest::noRequest> noRequestEvent;
+
+    void noResponse();
+
+    EventWrapper<MemTest, &MemTest::noResponse> noResponseEvent;

    class CpuPort : public MasterPort
    {
-        MemTest *memtest;
+        MemTest &memtest;

      public:

-        CpuPort(const std::string &_name, MemTest *_memtest)
-            : MasterPort(_name, _memtest), memtest(_memtest)
+        CpuPort(const std::string &_name, MemTest &_memtest)
+            : MasterPort(_name, &_memtest), memtest(_memtest)
        { }

      protected:

-        virtual bool recvTimingResp(PacketPtr pkt);
+        bool recvTimingResp(PacketPtr pkt);

-        virtual void recvTimingSnoopReq(PacketPtr pkt) { }
+        void recvTimingSnoopReq(PacketPtr pkt) { }

-        virtual Tick recvAtomicSnoop(PacketPtr pkt) { return 0; }
+        void recvFunctionalSnoop(PacketPtr pkt) { }

-        virtual void recvFunctionalSnoop(PacketPtr pkt) { }
+        Tick recvAtomicSnoop(PacketPtr pkt) { return 0; }

-        virtual void recvRetry();
+        void recvRetry();
    };

-    CpuPort cachePort;
-    CpuPort funcPort;
-    PortProxy funcProxy;
-
-    class MemTestSenderState : public Packet::SenderState
-    {
-      public:
-        /** Constructor. */
-        MemTestSenderState(uint8_t *_data)
-            : data(_data)
-        { }
-
-        // Hold onto data pointer
-        uint8_t *data;
-    };
+    CpuPort port;

    PacketPtr retryPkt;

-    bool accessRetry;
-    
-    //
-    // The dmaOustanding flag enforces only one dma at a time
-    //
-    bool dmaOutstanding;
+    const unsigned size;

-    unsigned size;              // size of testing memory region
+    const Cycles interval;

-    unsigned percentReads;      // target percentage of read accesses
-    unsigned percentFunctional; // target percentage of functional accesses
-    unsigned percentUncacheable;
-
-    bool issueDmas;
+    const unsigned percentReads;
+    const unsigned percentFunctional;
+    const unsigned percentUncacheable;

    /** Request id for all generated traffic */
    MasterID masterId;

-    int id;
+    unsigned int id;

-    std::set<unsigned> outstandingAddrs;
+    std::set<Addr> outstandingAddrs;

-    unsigned blockSize;
+    // store the expected value for the addresses we have touched
+    std::unordered_map<Addr, uint8_t> referenceData;

-    Addr blockAddrMask;
+    const unsigned blockSize;

-    Addr blockAddr(Addr addr)
+    const Addr blockAddrMask;
+
+    /**
+     * Get the block aligned address.
+     *
+     * @param addr Address to align
+     * @return The block aligned address
+     */
+    Addr blockAlign(Addr addr) const
    {
        return (addr & ~blockAddrMask);
    }

-    Addr traceBlockAddr;
-
-    Addr baseAddr1;             // fix this to option
-    Addr baseAddr2;             // fix this to option
+    Addr baseAddr1;
+    Addr baseAddr2;
    Addr uncacheAddr;

-    unsigned progressInterval;  // frequency of progress reports
+    const unsigned progressInterval;  // frequency of progress reports
+    const Cycles progressCheck;
    Tick nextProgressMessage;   // access # for next progress report

-    unsigned percentSourceUnaligned;
-    unsigned percentDestUnaligned;
-
-    Tick noResponseCycles;
-
    uint64_t numReads;
    uint64_t numWrites;
-    uint64_t maxLoads;
+    const uint64_t maxLoads;

-    bool atomic;
-    bool suppress_func_warnings;
+    const bool atomic;
+
+    const bool suppressFuncWarnings;

    Stats::Scalar numReadsStat;
    Stats::Scalar numWritesStat;
-    Stats::Scalar numCopiesStat;

-    // called by MemCompleteEvent::process()
-    void completeRequest(PacketPtr pkt);
+    /**
+     * Complete a request by checking the response.
+     *
+     * @param pkt Response packet
+     * @param functional Whether the access was functional or not
+     */
+    void completeRequest(PacketPtr pkt, bool functional = false);

-    void sendPkt(PacketPtr pkt);
+    bool sendPkt(PacketPtr pkt);

-    void doRetry();
+    void recvRetry();

-    friend class MemCompleteEvent;
 };

 #endif // __CPU_MEMTEST_MEMTEST_HH__
--- a/tests/configs/memtest-filter.py
+++ b/tests/configs/memtest-filter.py
@ -36,8 +36,7 @@ nb_cores = 8
 cpus = [ MemTest() for i in xrange(nb_cores) ]

 # system simulated
-system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False),
-                funcbus = NoncoherentXBar(),
+system = System(cpu = cpus,
                physmem = SimpleMemory(),
                membus = CoherentXBar(width=16, snoop_filter = SnoopFilter()))
 # Dummy voltage domain for all our clock domains
@ -63,15 +62,11 @@ for cpu in cpus:
    # All cpus are associated with cpu_clk_domain
    cpu.clk_domain = system.cpu_clk_domain
    cpu.l1c = L1Cache(size = '32kB', assoc = 4)
-    cpu.l1c.cpu_side = cpu.test
+    cpu.l1c.cpu_side = cpu.port
    cpu.l1c.mem_side = system.toL2Bus.slave
-    system.funcbus.slave = cpu.functional

 system.system_port = system.membus.slave

-# connect reference memory to funcbus
-system.funcmem.port = system.funcbus.master
-
 # connect memory to membus
 system.physmem.port = system.membus.master

@ -82,6 +77,3 @@ system.physmem.port = system.membus.master

 root = Root( full_system = False, system = system )
 root.system.mem_mode = 'timing'
-#root.trace.flags="Cache CachePort MemoryAccess"
-#root.trace.cycle=1
-
--- a/tests/configs/memtest-ruby.py
+++ b/tests/configs/memtest-ruby.py
@ -70,7 +70,7 @@ options.ports=32
 nb_cores = 8

 # ruby does not support atomic, functional, or uncacheable accesses
-cpus = [ MemTest(atomic=False, percent_functional=50,
+cpus = [ MemTest(percent_functional=50,
                 percent_uncacheable=0, suppress_func_warnings=True) \
         for i in xrange(nb_cores) ]

@ -78,9 +78,7 @@ cpus = [ MemTest(atomic=False, percent_functional=50,
 options.num_cpus = nb_cores
 
 # system simulated
-system = System(cpu = cpus,
-                funcmem = SimpleMemory(in_addr_map = False),
-                funcbus = NoncoherentXBar())
+system = System(cpu = cpus)
 # Dummy voltage domain for all our clock domains
 system.voltage_domain = VoltageDomain()
 system.clk_domain = SrcClockDomain(clock = '1GHz',
@ -107,21 +105,17 @@ assert(len(cpus) == len(system.ruby._cpu_ports))

 for (i, ruby_port) in enumerate(system.ruby._cpu_ports):
     #
-     # Tie the cpu test and functional ports to the ruby cpu ports and
+     # Tie the cpu port to the ruby cpu ports and
     # physmem, respectively
     #
-     cpus[i].test = ruby_port.slave
-     cpus[i].functional = system.funcbus.slave
-     
+     cpus[i].port = ruby_port.slave
+
     #
     # Since the memtester is incredibly bursty, increase the deadlock
     # threshold to 1 million cycles
     #
     ruby_port.deadlock_threshold = 1000000

-# connect reference memory to funcbus
-system.funcmem.port = system.funcbus.master
-
 # -----------------------
 # run simulation
 # -----------------------
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@ -36,8 +36,7 @@ nb_cores = 8
 cpus = [ MemTest() for i in xrange(nb_cores) ]

 # system simulated
-system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False),
-                funcbus = NoncoherentXBar(),
+system = System(cpu = cpus,
                physmem = SimpleMemory(),
                membus = CoherentXBar(width=16))
 # Dummy voltage domain for all our clock domains
@ -62,15 +61,11 @@ for cpu in cpus:
    # All cpus are associated with cpu_clk_domain
    cpu.clk_domain = system.cpu_clk_domain
    cpu.l1c = L1Cache(size = '32kB', assoc = 4)
-    cpu.l1c.cpu_side = cpu.test
+    cpu.l1c.cpu_side = cpu.port
    cpu.l1c.mem_side = system.toL2Bus.slave
-    system.funcbus.slave = cpu.functional

 system.system_port = system.membus.slave

-# connect reference memory to funcbus
-system.funcmem.port = system.funcbus.master
-
 # connect memory to membus
 system.physmem.port = system.membus.master

@ -81,6 +76,4 @@ system.physmem.port = system.membus.master

 root = Root( full_system = False, system = system )
 root.system.mem_mode = 'timing'
-#root.trace.flags="Cache CachePort MemoryAccess"
-#root.trace.cycle=1