cpu: Tidy up the MemTest and make false sharing more obvious

The MemTest class really only tests false sharing, and as such there
was a lot of old cruft that could be removed. This patch cleans up the
tester, and also makes it more clear what the assumptions are. As part
of this simplification the reference functional memory is also
removed.

The regression configs using MemTest are updated to reflect the
changes, and the stats will be bumped in a separate patch. The example
config will be updated in a separate patch due to more extensive
re-work.

In a follow-on patch a new tester will be introduced that uses the
MemChecker to implement true sharing.
This commit is contained in:
Andreas Hansson 2015-02-11 10:23:28 -05:00
parent 550c318490
commit 6563ec8634
7 changed files with 286 additions and 352 deletions

View file

@ -124,7 +124,7 @@ else:
# build a list of prototypes, one for each level of treespec, starting # build a list of prototypes, one for each level of treespec, starting
# at the end (last entry is tester objects) # at the end (last entry is tester objects)
prototypes = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, prototypes = [ MemTest(max_loads=options.maxloads,
percent_functional=options.functional, percent_functional=options.functional,
percent_uncacheable=options.uncacheable, percent_uncacheable=options.uncacheable,
progress_interval=options.progress) ] progress_interval=options.progress) ]
@ -146,12 +146,9 @@ for scale in treespec[:-2]:
prototypes.insert(0, next) prototypes.insert(0, next)
# system simulated # system simulated
system = System(funcmem = SimpleMemory(in_addr_map = False), system = System(physmem = SimpleMemory(latency = "100ns"),
funcbus = NoncoherentXBar(),
physmem = SimpleMemory(latency = "100ns"),
cache_line_size = block_size) cache_line_size = block_size)
system.voltage_domain = VoltageDomain(voltage = '1V') system.voltage_domain = VoltageDomain(voltage = '1V')
system.clk_domain = SrcClockDomain(clock = options.sys_clock, system.clk_domain = SrcClockDomain(clock = options.sys_clock,
@ -182,14 +179,10 @@ def make_level(spec, prototypes, attach_obj, attach_port):
# we just built the MemTest objects # we just built the MemTest objects
parent.cpu = objs parent.cpu = objs
for t in objs: for t in objs:
t.test = getattr(attach_obj, attach_port) t.port = getattr(attach_obj, attach_port)
t.functional = system.funcbus.slave
make_level(treespec, prototypes, system.physmem, "port") make_level(treespec, prototypes, system.physmem, "port")
# connect reference memory to funcbus
system.funcbus.master = system.funcmem.port
# ----------------------- # -----------------------
# run simulation # run simulation
# ----------------------- # -----------------------
@ -202,7 +195,7 @@ else:
# The system port is never used in the tester so merely connect it # The system port is never used in the tester so merely connect it
# to avoid problems # to avoid problems
root.system.system_port = root.system.funcbus.slave root.system.system_port = root.system.physmem.cpu_side_bus.slave
# Not much point in this being higher than the L1 latency # Not much point in this being higher than the L1 latency
m5.ticks.setGlobalFrequency('1ns') m5.ticks.setGlobalFrequency('1ns')

View file

@ -1,3 +1,15 @@
# Copyright (c) 2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2005-2007 The Regents of The University of Michigan # Copyright (c) 2005-2007 The Regents of The University of Michigan
# All rights reserved. # All rights reserved.
# #
@ -25,6 +37,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# #
# Authors: Nathan Binkert # Authors: Nathan Binkert
# Andreas Hansson
from MemObject import MemObject from MemObject import MemObject
from m5.params import * from m5.params import *
@ -33,26 +46,30 @@ from m5.proxy import *
class MemTest(MemObject): class MemTest(MemObject):
type = 'MemTest' type = 'MemTest'
cxx_header = "cpu/testers/memtest/memtest.hh" cxx_header = "cpu/testers/memtest/memtest.hh"
max_loads = Param.Counter(0, "number of loads to execute")
atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n") # Interval of packet injection, the size of the memory range
memory_size = Param.Int(65536, "memory size") # touched, and an optional stop condition
percent_dest_unaligned = Param.Percent(50, interval = Param.Cycles(1, "Interval between request packets")
"percent of copy dest address that are unaligned") size = Param.Unsigned(65536, "Size of memory region to use (bytes)")
percent_reads = Param.Percent(65, "target read percentage") max_loads = Param.Counter(0, "Number of loads to execute before exiting")
issue_dmas = Param.Bool(False, "this memtester should issue dma requests")
percent_source_unaligned = Param.Percent(50, # Control the mix of packets and if functional accesses are part of
"percent of copy source address that are unaligned") # the mix or not
percent_functional = Param.Percent(50, "percent of access that are functional") percent_reads = Param.Percent(65, "Percentage reads")
percent_uncacheable = Param.Percent(10, percent_functional = Param.Percent(50, "Percentage functional accesses")
"target uncacheable percentage") percent_uncacheable = Param.Percent(10, "Percentage uncacheable")
# Determine how often to print progress messages and what timeout
# to use for checking progress of both requests and responses
progress_interval = Param.Counter(1000000, progress_interval = Param.Counter(1000000,
"progress report interval (in accesses)") "Progress report interval (in accesses)")
trace_addr = Param.Addr(0, "address to trace") progress_check = Param.Cycles(5000000, "Cycles before exiting " \
"due to lack of progress")
test = MasterPort("Port to the memory system to test") port = MasterPort("Port to the memory system")
functional = MasterPort("Port to the functional memory " \ system = Param.System(Parent.any, "System this tester is part of")
"used for verification")
suppress_func_warnings = Param.Bool(False,
"suppress warnings when functional accesses fail.\n")
sys = Param.System(Parent.any, "System Parameter")
# Add the ability to supress error responses on functional
# accesses as Ruby needs this
suppress_func_warnings = Param.Bool(False, "Suppress warnings when "\
"functional accesses fail.")

View file

@ -1,4 +1,16 @@
/* /*
* Copyright (c) 2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2002-2005 The Regents of The University of Michigan
* All rights reserved. * All rights reserved.
* *
@ -27,173 +39,129 @@
* *
* Authors: Erik Hallnor * Authors: Erik Hallnor
* Steve Reinhardt * Steve Reinhardt
* Andreas Hansson
*/ */
// FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded
#include <iomanip>
#include <set>
#include <string>
#include <vector>
#include "base/misc.hh"
#include "base/random.hh" #include "base/random.hh"
#include "base/statistics.hh" #include "base/statistics.hh"
#include "cpu/testers/memtest/memtest.hh" #include "cpu/testers/memtest/memtest.hh"
#include "debug/MemTest.hh" #include "debug/MemTest.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"
#include "mem/packet.hh" #include "sim/sim_exit.hh"
#include "mem/port.hh"
#include "mem/request.hh"
#include "sim/sim_events.hh"
#include "sim/stats.hh" #include "sim/stats.hh"
#include "sim/system.hh" #include "sim/system.hh"
using namespace std; using namespace std;
int TESTER_ALLOCATOR=0; unsigned int TESTER_ALLOCATOR = 0;
bool bool
MemTest::CpuPort::recvTimingResp(PacketPtr pkt) MemTest::CpuPort::recvTimingResp(PacketPtr pkt)
{ {
memtest->completeRequest(pkt); memtest.completeRequest(pkt);
return true; return true;
} }
void void
MemTest::CpuPort::recvRetry() MemTest::CpuPort::recvRetry()
{ {
memtest->doRetry(); memtest.recvRetry();
} }
void bool
MemTest::sendPkt(PacketPtr pkt) { MemTest::sendPkt(PacketPtr pkt) {
if (atomic) { if (atomic) {
cachePort.sendAtomic(pkt); port.sendAtomic(pkt);
completeRequest(pkt); completeRequest(pkt);
}
else if (!cachePort.sendTimingReq(pkt)) {
DPRINTF(MemTest, "accessRetry setting to true\n");
//
// dma requests should never be retried
//
if (issueDmas) {
panic("Nacked DMA requests are not supported\n");
}
accessRetry = true;
retryPkt = pkt;
} else { } else {
if (issueDmas) { if (!port.sendTimingReq(pkt)) {
dmaOutstanding = true; retryPkt = pkt;
return false;
} }
} }
return true;
} }
MemTest::MemTest(const Params *p) MemTest::MemTest(const Params *p)
: MemObject(p), : MemObject(p),
tickEvent(this), tickEvent(this),
cachePort("test", this), noRequestEvent(this),
funcPort("functional", this), noResponseEvent(this),
funcProxy(funcPort, p->sys->cacheLineSize()), port("port", *this),
retryPkt(NULL), retryPkt(nullptr),
// mainMem(main_mem), size(p->size),
// checkMem(check_mem), interval(p->interval),
size(p->memory_size),
percentReads(p->percent_reads), percentReads(p->percent_reads),
percentFunctional(p->percent_functional), percentFunctional(p->percent_functional),
percentUncacheable(p->percent_uncacheable), percentUncacheable(p->percent_uncacheable),
issueDmas(p->issue_dmas), masterId(p->system->getMasterId(name())),
masterId(p->sys->getMasterId(name())), blockSize(p->system->cacheLineSize()),
blockSize(p->sys->cacheLineSize()), blockAddrMask(blockSize - 1),
progressInterval(p->progress_interval), progressInterval(p->progress_interval),
progressCheck(p->progress_check),
nextProgressMessage(p->progress_interval), nextProgressMessage(p->progress_interval),
percentSourceUnaligned(p->percent_source_unaligned),
percentDestUnaligned(p->percent_dest_unaligned),
maxLoads(p->max_loads), maxLoads(p->max_loads),
atomic(p->atomic), atomic(p->system->isAtomicMode()),
suppress_func_warnings(p->suppress_func_warnings) suppressFuncWarnings(p->suppress_func_warnings)
{ {
id = TESTER_ALLOCATOR++; id = TESTER_ALLOCATOR++;
fatal_if(id >= blockSize, "Too many testers, only %d allowed\n",
blockSize - 1);
// Needs to be masked off once we know the block size.
traceBlockAddr = p->trace_addr;
baseAddr1 = 0x100000; baseAddr1 = 0x100000;
baseAddr2 = 0x400000; baseAddr2 = 0x400000;
uncacheAddr = 0x800000; uncacheAddr = 0x800000;
blockAddrMask = blockSize - 1;
traceBlockAddr = blockAddr(traceBlockAddr);
// set up counters // set up counters
noResponseCycles = 0;
numReads = 0; numReads = 0;
numWrites = 0; numWrites = 0;
schedule(tickEvent, 0);
accessRetry = false; // kick things into action
dmaOutstanding = false; schedule(tickEvent, curTick());
schedule(noRequestEvent, clockEdge(progressCheck));
schedule(noResponseEvent, clockEdge(progressCheck));
} }
BaseMasterPort & BaseMasterPort &
MemTest::getMasterPort(const std::string &if_name, PortID idx) MemTest::getMasterPort(const std::string &if_name, PortID idx)
{ {
if (if_name == "functional") if (if_name == "port")
return funcPort; return port;
else if (if_name == "test")
return cachePort;
else else
return MemObject::getMasterPort(if_name, idx); return MemObject::getMasterPort(if_name, idx);
} }
void void
MemTest::init() MemTest::completeRequest(PacketPtr pkt, bool functional)
{
// initial memory contents for both physical memory and functional
// memory should be 0; no need to initialize them.
}
void
MemTest::completeRequest(PacketPtr pkt)
{ {
Request *req = pkt->req; Request *req = pkt->req;
assert(req->getSize() == 1);
if (issueDmas) { // this address is no longer outstanding
dmaOutstanding = false; auto remove_addr = outstandingAddrs.find(req->getPaddr());
} assert(remove_addr != outstandingAddrs.end());
outstandingAddrs.erase(remove_addr);
DPRINTF(MemTest, "completing %s at address %x (blk %x) %s\n", DPRINTF(MemTest, "Completing %s at address %x (blk %x) %s\n",
pkt->isWrite() ? "write" : "read", pkt->isWrite() ? "write" : "read",
req->getPaddr(), blockAddr(req->getPaddr()), req->getPaddr(), blockAlign(req->getPaddr()),
pkt->isError() ? "error" : "success"); pkt->isError() ? "error" : "success");
MemTestSenderState *state = const uint8_t *pkt_data = pkt->getConstPtr<uint8_t>();
safe_cast<MemTestSenderState *>(pkt->senderState);
uint8_t *data = state->data;
// @todo: This should really be a const pointer
uint8_t *pkt_data = pkt->getPtr<uint8_t>();
//Remove the address from the list of outstanding
std::set<unsigned>::iterator removeAddr =
outstandingAddrs.find(req->getPaddr());
assert(removeAddr != outstandingAddrs.end());
outstandingAddrs.erase(removeAddr);
if (pkt->isError()) { if (pkt->isError()) {
if (!suppress_func_warnings) { if (!functional || !suppressFuncWarnings) {
warn("Functional %s access failed at %#x\n", warn("%s access failed at %#x\n",
pkt->isWrite() ? "write" : "read", req->getPaddr()); pkt->isWrite() ? "Write" : "Read", req->getPaddr());
} }
} else { } else {
if (pkt->isRead()) { if (pkt->isRead()) {
if (memcmp(pkt_data, data, pkt->getSize()) != 0) { uint8_t ref_data = referenceData[req->getPaddr()];
if (pkt_data[0] != ref_data) {
panic("%s: read of %x (blk %x) @ cycle %d " panic("%s: read of %x (blk %x) @ cycle %d "
"returns %x, expected %x\n", name(), "returns %x, expected %x\n", name(),
req->getPaddr(), blockAddr(req->getPaddr()), curTick(), req->getPaddr(), blockAlign(req->getPaddr()), curTick(),
*pkt_data, *data); pkt_data[0], ref_data);
} }
numReads++; numReads++;
@ -209,17 +177,21 @@ MemTest::completeRequest(PacketPtr pkt)
exitSimLoop("maximum number of loads reached"); exitSimLoop("maximum number of loads reached");
} else { } else {
assert(pkt->isWrite()); assert(pkt->isWrite());
funcProxy.writeBlob(req->getPaddr(), pkt_data, req->getSize());
// update the reference data
referenceData[req->getPaddr()] = pkt_data[0];
numWrites++; numWrites++;
numWritesStat++; numWritesStat++;
} }
} }
noResponseCycles = 0;
delete state;
delete [] data;
delete pkt->req; delete pkt->req;
// the packet will delete the data
delete pkt; delete pkt;
// finally shift the response timeout forward
reschedule(noResponseEvent, clockEdge(progressCheck), true);
} }
void void
@ -236,151 +208,126 @@ MemTest::regStats()
.name(name() + ".num_writes") .name(name() + ".num_writes")
.desc("number of write accesses completed") .desc("number of write accesses completed")
; ;
numCopiesStat
.name(name() + ".num_copies")
.desc("number of copy accesses completed")
;
} }
void void
MemTest::tick() MemTest::tick()
{ {
if (!tickEvent.scheduled()) // we should never tick if we are waiting for a retry
schedule(tickEvent, clockEdge(Cycles(1))); assert(!retryPkt);
if (++noResponseCycles >= 500000) { // create a new request
if (issueDmas) {
cerr << "DMA tester ";
}
cerr << name() << ": deadlocked at cycle " << curTick() << endl;
fatal("");
}
if (accessRetry || (issueDmas && dmaOutstanding)) {
DPRINTF(MemTest, "MemTester waiting on accessRetry or DMA response\n");
return;
}
//make new request
unsigned cmd = random_mt.random(0, 100); unsigned cmd = random_mt.random(0, 100);
unsigned offset = random_mt.random<unsigned>(0, size - 1); uint8_t data = random_mt.random<uint8_t>();
unsigned base = random_mt.random(0, 1);
uint64_t data = random_mt.random<uint64_t>();
unsigned access_size = random_mt.random(0, 3);
bool uncacheable = random_mt.random(0, 100) < percentUncacheable; bool uncacheable = random_mt.random(0, 100) < percentUncacheable;
unsigned base = random_mt.random(0, 1);
unsigned dma_access_size = random_mt.random(0, 3);
//If we aren't doing copies, use id as offset, and do a false sharing
//mem tester
//We can eliminate the lower bits of the offset, and then use the id
//to offset within the blks
offset = blockAddr(offset);
offset += id;
access_size = 0;
dma_access_size = 0;
Request::Flags flags; Request::Flags flags;
Addr paddr; Addr paddr;
// generate a unique address
do {
unsigned offset = random_mt.random<unsigned>(0, size - 1);
// use the tester id as offset within the block for false sharing
offset = blockAlign(offset);
offset += id;
if (uncacheable) { if (uncacheable) {
flags.set(Request::UNCACHEABLE); flags.set(Request::UNCACHEABLE);
paddr = uncacheAddr + offset; paddr = uncacheAddr + offset;
} else { } else {
paddr = ((base) ? baseAddr1 : baseAddr2) + offset; paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
} }
} while (outstandingAddrs.find(paddr) != outstandingAddrs.end());
// For now we only allow one outstanding request per address
// per tester This means we assume CPU does write forwarding
// to reads that alias something in the cpu store buffer.
if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
return;
}
bool do_functional = (random_mt.random(0, 100) < percentFunctional) && bool do_functional = (random_mt.random(0, 100) < percentFunctional) &&
!uncacheable; !uncacheable;
Request *req = nullptr; Request *req = new Request(paddr, 1, flags, masterId);
uint8_t *result = new uint8_t[8]; req->setThreadContext(id, 0);
if (issueDmas) { outstandingAddrs.insert(paddr);
paddr &= ~((1 << dma_access_size) - 1);
req = new Request(paddr, 1 << dma_access_size, flags, masterId); // sanity check
req->setThreadContext(id,0); panic_if(outstandingAddrs.size() > 100,
} else { "Tester %s has more than 100 outstanding requests\n", name());
paddr &= ~((1 << access_size) - 1);
req = new Request(paddr, 1 << access_size, flags, masterId); PacketPtr pkt = nullptr;
req->setThreadContext(id,0); uint8_t *pkt_data = new uint8_t[1];
}
assert(req->getSize() == 1);
if (cmd < percentReads) { if (cmd < percentReads) {
// read // start by ensuring there is a reference value if we have not
outstandingAddrs.insert(paddr); // seen this address before
uint8_t M5_VAR_USED ref_data = 0;
// ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin auto ref = referenceData.find(req->getPaddr());
funcProxy.readBlob(req->getPaddr(), result, req->getSize()); if (ref == referenceData.end()) {
referenceData[req->getPaddr()] = 0;
} else {
ref_data = ref->second;
}
DPRINTF(MemTest, DPRINTF(MemTest,
"id %d initiating %sread at addr %x (blk %x) expecting %x\n", "Initiating %sread at addr %x (blk %x) expecting %x\n",
id, do_functional ? "functional " : "", req->getPaddr(),
blockAddr(req->getPaddr()), *result);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
pkt->dataDynamic(new uint8_t[req->getSize()]);
MemTestSenderState *state = new MemTestSenderState(result);
pkt->senderState = state;
if (do_functional) {
assert(pkt->needsResponse());
pkt->setSuppressFuncError();
cachePort.sendFunctional(pkt);
completeRequest(pkt);
} else {
sendPkt(pkt);
}
} else {
// write
outstandingAddrs.insert(paddr);
DPRINTF(MemTest, "initiating %swrite at addr %x (blk %x) value %x\n",
do_functional ? "functional " : "", req->getPaddr(), do_functional ? "functional " : "", req->getPaddr(),
blockAddr(req->getPaddr()), data & 0xff); blockAlign(req->getPaddr()), ref_data);
PacketPtr pkt = new Packet(req, MemCmd::WriteReq); pkt = new Packet(req, MemCmd::ReadReq);
uint8_t *pkt_data = new uint8_t[req->getSize()];
pkt->dataDynamic(pkt_data); pkt->dataDynamic(pkt_data);
memcpy(pkt_data, &data, req->getSize()); } else {
MemTestSenderState *state = new MemTestSenderState(result); DPRINTF(MemTest, "Initiating %swrite at addr %x (blk %x) value %x\n",
pkt->senderState = state; do_functional ? "functional " : "", req->getPaddr(),
blockAlign(req->getPaddr()), data);
pkt = new Packet(req, MemCmd::WriteReq);
pkt->dataDynamic(pkt_data);
pkt_data[0] = data;
}
// there is no point in ticking if we are waiting for a retry
bool keep_ticking = true;
if (do_functional) { if (do_functional) {
pkt->setSuppressFuncError(); pkt->setSuppressFuncError();
cachePort.sendFunctional(pkt); port.sendFunctional(pkt);
completeRequest(pkt); completeRequest(pkt, true);
} else { } else {
sendPkt(pkt); keep_ticking = sendPkt(pkt);
} }
if (keep_ticking) {
// schedule the next tick
schedule(tickEvent, clockEdge(interval));
// finally shift the timeout for sending of requests forwards
// as we have successfully sent a packet
reschedule(noRequestEvent, clockEdge(progressCheck), true);
} else {
DPRINTF(MemTest, "Waiting for retry\n");
} }
} }
void void
MemTest::doRetry() MemTest::noRequest()
{ {
if (cachePort.sendTimingReq(retryPkt)) { panic("%s did not send a request for %d cycles", name(), progressCheck);
DPRINTF(MemTest, "accessRetry setting to false\n");
accessRetry = false;
retryPkt = NULL;
}
} }
void void
MemTest::printAddr(Addr a) MemTest::noResponse()
{ {
cachePort.printAddr(a); panic("%s did not see a response for %d cycles", name(), progressCheck);
} }
void
MemTest::recvRetry()
{
assert(retryPkt);
if (port.sendTimingReq(retryPkt)) {
DPRINTF(MemTest, "Proceeding after successful retry\n");
retryPkt = nullptr;
// kick things into action again
schedule(tickEvent, clockEdge(interval));
}
}
MemTest * MemTest *
MemTestParams::create() MemTestParams::create()

View file

@ -1,4 +1,16 @@
/* /*
* Copyright (c) 2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2002-2005 The Regents of The University of Michigan
* All rights reserved. * All rights reserved.
* *
@ -27,166 +39,152 @@
* *
* Authors: Erik Hallnor * Authors: Erik Hallnor
* Steve Reinhardt * Steve Reinhardt
* Andreas Hansson
*/ */
#ifndef __CPU_MEMTEST_MEMTEST_HH__ #ifndef __CPU_MEMTEST_MEMTEST_HH__
#define __CPU_MEMTEST_MEMTEST_HH__ #define __CPU_MEMTEST_MEMTEST_HH__
#include <set> #include <set>
#include <unordered_map>
#include "base/statistics.hh" #include "base/statistics.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"
#include "mem/port.hh"
#include "mem/port_proxy.hh"
#include "params/MemTest.hh" #include "params/MemTest.hh"
#include "sim/eventq.hh" #include "sim/eventq.hh"
#include "sim/sim_exit.hh"
#include "sim/sim_object.hh"
#include "sim/stats.hh" #include "sim/stats.hh"
class Packet; /**
* The MemTest class tests a cache coherent memory system by
* generating false sharing and verifying the read data against a
* reference updated on the completion of writes. Each tester reads
* and writes a specific byte in a cache line, as determined by its
* unique id. Thus, all requests issued by the MemTest instance are a
* single byte and a specific address is only ever touched by a single
* tester.
*
* In addition to verifying the data, the tester also has timeouts for
* both requests and responses, thus checking that the memory-system
* is making progress.
*/
class MemTest : public MemObject class MemTest : public MemObject
{ {
public: public:
typedef MemTestParams Params; typedef MemTestParams Params;
MemTest(const Params *p); MemTest(const Params *p);
virtual void init();
// register statistics
virtual void regStats(); virtual void regStats();
// main simulation loop (one cycle)
void tick();
virtual BaseMasterPort &getMasterPort(const std::string &if_name, virtual BaseMasterPort &getMasterPort(const std::string &if_name,
PortID idx = InvalidPortID); PortID idx = InvalidPortID);
/**
* Print state of address in memory system via PrintReq (for
* debugging).
*/
void printAddr(Addr a);
protected: protected:
class TickEvent : public Event
{
private:
MemTest *cpu;
public: void tick();
TickEvent(MemTest *c) : Event(CPU_Tick_Pri), cpu(c) {}
void process() { cpu->tick(); }
virtual const char *description() const { return "MemTest tick"; }
};
TickEvent tickEvent; EventWrapper<MemTest, &MemTest::tick> tickEvent;
void noRequest();
EventWrapper<MemTest, &MemTest::noRequest> noRequestEvent;
void noResponse();
EventWrapper<MemTest, &MemTest::noResponse> noResponseEvent;
class CpuPort : public MasterPort class CpuPort : public MasterPort
{ {
MemTest *memtest; MemTest &memtest;
public: public:
CpuPort(const std::string &_name, MemTest *_memtest) CpuPort(const std::string &_name, MemTest &_memtest)
: MasterPort(_name, _memtest), memtest(_memtest) : MasterPort(_name, &_memtest), memtest(_memtest)
{ } { }
protected: protected:
virtual bool recvTimingResp(PacketPtr pkt); bool recvTimingResp(PacketPtr pkt);
virtual void recvTimingSnoopReq(PacketPtr pkt) { } void recvTimingSnoopReq(PacketPtr pkt) { }
virtual Tick recvAtomicSnoop(PacketPtr pkt) { return 0; } void recvFunctionalSnoop(PacketPtr pkt) { }
virtual void recvFunctionalSnoop(PacketPtr pkt) { } Tick recvAtomicSnoop(PacketPtr pkt) { return 0; }
virtual void recvRetry(); void recvRetry();
}; };
CpuPort cachePort; CpuPort port;
CpuPort funcPort;
PortProxy funcProxy;
class MemTestSenderState : public Packet::SenderState
{
public:
/** Constructor. */
MemTestSenderState(uint8_t *_data)
: data(_data)
{ }
// Hold onto data pointer
uint8_t *data;
};
PacketPtr retryPkt; PacketPtr retryPkt;
bool accessRetry; const unsigned size;
// const Cycles interval;
// The dmaOustanding flag enforces only one dma at a time
//
bool dmaOutstanding;
unsigned size; // size of testing memory region const unsigned percentReads;
const unsigned percentFunctional;
unsigned percentReads; // target percentage of read accesses const unsigned percentUncacheable;
unsigned percentFunctional; // target percentage of functional accesses
unsigned percentUncacheable;
bool issueDmas;
/** Request id for all generated traffic */ /** Request id for all generated traffic */
MasterID masterId; MasterID masterId;
int id; unsigned int id;
std::set<unsigned> outstandingAddrs; std::set<Addr> outstandingAddrs;
unsigned blockSize; // store the expected value for the addresses we have touched
std::unordered_map<Addr, uint8_t> referenceData;
Addr blockAddrMask; const unsigned blockSize;
Addr blockAddr(Addr addr) const Addr blockAddrMask;
/**
* Get the block aligned address.
*
* @param addr Address to align
* @return The block aligned address
*/
Addr blockAlign(Addr addr) const
{ {
return (addr & ~blockAddrMask); return (addr & ~blockAddrMask);
} }
Addr traceBlockAddr; Addr baseAddr1;
Addr baseAddr2;
Addr baseAddr1; // fix this to option
Addr baseAddr2; // fix this to option
Addr uncacheAddr; Addr uncacheAddr;
unsigned progressInterval; // frequency of progress reports const unsigned progressInterval; // frequency of progress reports
const Cycles progressCheck;
Tick nextProgressMessage; // access # for next progress report Tick nextProgressMessage; // access # for next progress report
unsigned percentSourceUnaligned;
unsigned percentDestUnaligned;
Tick noResponseCycles;
uint64_t numReads; uint64_t numReads;
uint64_t numWrites; uint64_t numWrites;
uint64_t maxLoads; const uint64_t maxLoads;
bool atomic; const bool atomic;
bool suppress_func_warnings;
const bool suppressFuncWarnings;
Stats::Scalar numReadsStat; Stats::Scalar numReadsStat;
Stats::Scalar numWritesStat; Stats::Scalar numWritesStat;
Stats::Scalar numCopiesStat;
// called by MemCompleteEvent::process() /**
void completeRequest(PacketPtr pkt); * Complete a request by checking the response.
*
* @param pkt Response packet
* @param functional Whether the access was functional or not
*/
void completeRequest(PacketPtr pkt, bool functional = false);
void sendPkt(PacketPtr pkt); bool sendPkt(PacketPtr pkt);
void doRetry(); void recvRetry();
friend class MemCompleteEvent;
}; };
#endif // __CPU_MEMTEST_MEMTEST_HH__ #endif // __CPU_MEMTEST_MEMTEST_HH__

View file

@ -36,8 +36,7 @@ nb_cores = 8
cpus = [ MemTest() for i in xrange(nb_cores) ] cpus = [ MemTest() for i in xrange(nb_cores) ]
# system simulated # system simulated
system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False), system = System(cpu = cpus,
funcbus = NoncoherentXBar(),
physmem = SimpleMemory(), physmem = SimpleMemory(),
membus = CoherentXBar(width=16, snoop_filter = SnoopFilter())) membus = CoherentXBar(width=16, snoop_filter = SnoopFilter()))
# Dummy voltage domain for all our clock domains # Dummy voltage domain for all our clock domains
@ -63,15 +62,11 @@ for cpu in cpus:
# All cpus are associated with cpu_clk_domain # All cpus are associated with cpu_clk_domain
cpu.clk_domain = system.cpu_clk_domain cpu.clk_domain = system.cpu_clk_domain
cpu.l1c = L1Cache(size = '32kB', assoc = 4) cpu.l1c = L1Cache(size = '32kB', assoc = 4)
cpu.l1c.cpu_side = cpu.test cpu.l1c.cpu_side = cpu.port
cpu.l1c.mem_side = system.toL2Bus.slave cpu.l1c.mem_side = system.toL2Bus.slave
system.funcbus.slave = cpu.functional
system.system_port = system.membus.slave system.system_port = system.membus.slave
# connect reference memory to funcbus
system.funcmem.port = system.funcbus.master
# connect memory to membus # connect memory to membus
system.physmem.port = system.membus.master system.physmem.port = system.membus.master
@ -82,6 +77,3 @@ system.physmem.port = system.membus.master
root = Root( full_system = False, system = system ) root = Root( full_system = False, system = system )
root.system.mem_mode = 'timing' root.system.mem_mode = 'timing'
#root.trace.flags="Cache CachePort MemoryAccess"
#root.trace.cycle=1

View file

@ -70,7 +70,7 @@ options.ports=32
nb_cores = 8 nb_cores = 8
# ruby does not support atomic, functional, or uncacheable accesses # ruby does not support atomic, functional, or uncacheable accesses
cpus = [ MemTest(atomic=False, percent_functional=50, cpus = [ MemTest(percent_functional=50,
percent_uncacheable=0, suppress_func_warnings=True) \ percent_uncacheable=0, suppress_func_warnings=True) \
for i in xrange(nb_cores) ] for i in xrange(nb_cores) ]
@ -78,9 +78,7 @@ cpus = [ MemTest(atomic=False, percent_functional=50,
options.num_cpus = nb_cores options.num_cpus = nb_cores
# system simulated # system simulated
system = System(cpu = cpus, system = System(cpu = cpus)
funcmem = SimpleMemory(in_addr_map = False),
funcbus = NoncoherentXBar())
# Dummy voltage domain for all our clock domains # Dummy voltage domain for all our clock domains
system.voltage_domain = VoltageDomain() system.voltage_domain = VoltageDomain()
system.clk_domain = SrcClockDomain(clock = '1GHz', system.clk_domain = SrcClockDomain(clock = '1GHz',
@ -107,11 +105,10 @@ assert(len(cpus) == len(system.ruby._cpu_ports))
for (i, ruby_port) in enumerate(system.ruby._cpu_ports): for (i, ruby_port) in enumerate(system.ruby._cpu_ports):
# #
# Tie the cpu test and functional ports to the ruby cpu ports and # Tie the cpu port to the ruby cpu ports and
# physmem, respectively # physmem, respectively
# #
cpus[i].test = ruby_port.slave cpus[i].port = ruby_port.slave
cpus[i].functional = system.funcbus.slave
# #
# Since the memtester is incredibly bursty, increase the deadlock # Since the memtester is incredibly bursty, increase the deadlock
@ -119,9 +116,6 @@ for (i, ruby_port) in enumerate(system.ruby._cpu_ports):
# #
ruby_port.deadlock_threshold = 1000000 ruby_port.deadlock_threshold = 1000000
# connect reference memory to funcbus
system.funcmem.port = system.funcbus.master
# ----------------------- # -----------------------
# run simulation # run simulation
# ----------------------- # -----------------------

View file

@ -36,8 +36,7 @@ nb_cores = 8
cpus = [ MemTest() for i in xrange(nb_cores) ] cpus = [ MemTest() for i in xrange(nb_cores) ]
# system simulated # system simulated
system = System(cpu = cpus, funcmem = SimpleMemory(in_addr_map = False), system = System(cpu = cpus,
funcbus = NoncoherentXBar(),
physmem = SimpleMemory(), physmem = SimpleMemory(),
membus = CoherentXBar(width=16)) membus = CoherentXBar(width=16))
# Dummy voltage domain for all our clock domains # Dummy voltage domain for all our clock domains
@ -62,15 +61,11 @@ for cpu in cpus:
# All cpus are associated with cpu_clk_domain # All cpus are associated with cpu_clk_domain
cpu.clk_domain = system.cpu_clk_domain cpu.clk_domain = system.cpu_clk_domain
cpu.l1c = L1Cache(size = '32kB', assoc = 4) cpu.l1c = L1Cache(size = '32kB', assoc = 4)
cpu.l1c.cpu_side = cpu.test cpu.l1c.cpu_side = cpu.port
cpu.l1c.mem_side = system.toL2Bus.slave cpu.l1c.mem_side = system.toL2Bus.slave
system.funcbus.slave = cpu.functional
system.system_port = system.membus.slave system.system_port = system.membus.slave
# connect reference memory to funcbus
system.funcmem.port = system.funcbus.master
# connect memory to membus # connect memory to membus
system.physmem.port = system.membus.master system.physmem.port = system.membus.master
@ -81,6 +76,4 @@ system.physmem.port = system.membus.master
root = Root( full_system = False, system = system ) root = Root( full_system = False, system = system )
root.system.mem_mode = 'timing' root.system.mem_mode = 'timing'
#root.trace.flags="Cache CachePort MemoryAccess"
#root.trace.cycle=1