memtest: Memtester support for DMA

This patch adds DMA testing to the Memtester and is inherits many changes from
Polina's old tester_dma_extension patch.  Since Ruby does not work in atomic
mode, the atomic mode options are removed.
This commit is contained in:
Brad Beckmann 2010-08-20 11:46:12 -07:00
parent 64b2205992
commit 808701a10c
11 changed files with 114 additions and 32 deletions

View file

@ -48,25 +48,13 @@ m5_root = os.path.dirname(config_root)
parser = optparse.OptionParser() parser = optparse.OptionParser()
parser.add_option("-a", "--atomic", action="store_true",
help="Use atomic (non-timing) mode")
parser.add_option("-b", "--blocking", action="store_true",
help="Use blocking caches")
parser.add_option("-l", "--maxloads", metavar="N", default=0, parser.add_option("-l", "--maxloads", metavar="N", default=0,
help="Stop after N loads") help="Stop after N loads")
parser.add_option("-f", "--functional", type="int", default=0,
metavar="PCT",
help="Target percentage of functional accesses "
"[default: %default]")
parser.add_option("-u", "--uncacheable", type="int", default=0,
metavar="PCT",
help="Target percentage of uncacheable accesses "
"[default: %default]")
parser.add_option("--progress", type="int", default=1000, parser.add_option("--progress", type="int", default=1000,
metavar="NLOADS", metavar="NLOADS",
help="Progress message interval " help="Progress message interval "
"[default: %default]") "[default: %default]")
parser.add_option("--num-dmas", type="int", default=0, help="# of dma testers")
# #
# Add the ruby specific and protocol specific options # Add the ruby specific and protocol specific options
@ -101,36 +89,61 @@ if options.num_cpus > block_size:
% (options.num_cpus, block_size) % (options.num_cpus, block_size)
sys.exit(1) sys.exit(1)
cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, \ #
percent_functional=options.functional, \ # Currently ruby does not support atomic, functional, or uncacheable accesses
percent_uncacheable=options.uncacheable, \ #
progress_interval=options.progress) \ cpus = [ MemTest(atomic = False, \
max_loads = options.maxloads, \
issue_dmas = False, \
percent_functional = 0, \
percent_uncacheable = 0, \
progress_interval = options.progress) \
for i in xrange(options.num_cpus) ] for i in xrange(options.num_cpus) ]
system = System(cpu = cpus, system = System(cpu = cpus,
funcmem = PhysicalMemory(), funcmem = PhysicalMemory(),
physmem = PhysicalMemory()) physmem = PhysicalMemory())
system.ruby = Ruby.create_system(options, system) system.dmas = [ MemTest(atomic = False, \
max_loads = options.maxloads, \
issue_dmas = True, \
percent_functional = 0, \
percent_uncacheable = 0, \
progress_interval = options.progress) \
for i in xrange(options.num_dmas) ]
system.ruby = Ruby.create_system(options, \
system.physmem, \
dma_devices = system.dmas)
#
# The tester is most effective when randomization is turned on and
# artifical delay is randomly inserted on messages
#
system.ruby.randomization = True
assert(len(cpus) == len(system.ruby.cpu_ruby_ports)) assert(len(cpus) == len(system.ruby.cpu_ruby_ports))
for (i, cpu) in enumerate(cpus): for (i, cpu) in enumerate(cpus):
# #
# Tie the memtester ports to the correct system ports # Tie the cpu memtester ports to the correct system ports
# #
cpu.test = system.ruby.cpu_ruby_ports[i].port cpu.test = system.ruby.cpu_ruby_ports[i].port
cpu.functional = system.funcmem.port cpu.functional = system.funcmem.port
for (i, dma) in enumerate(system.dmas):
#
# Tie the dma memtester ports to the correct functional port
# Note that the test port has already been connected to the dma_sequencer
#
dma.functional = system.funcmem.port
# ----------------------- # -----------------------
# run simulation # run simulation
# ----------------------- # -----------------------
root = Root( system = system ) root = Root( system = system )
if options.atomic: root.system.mem_mode = 'timing'
root.system.mem_mode = 'atomic'
else:
root.system.mem_mode = 'timing'
# Not much point in this being higher than the L1 latency # Not much point in this being higher than the L1 latency
m5.ticks.setGlobalFrequency('1ns') m5.ticks.setGlobalFrequency('1ns')

View file

@ -153,6 +153,10 @@ def create_system(options, system, piobus, dma_devices):
dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma
exec("system.dma_cntrl%d = dma_cntrl" % i) exec("system.dma_cntrl%d = dma_cntrl" % i)
if dma_device.type == 'MemTest':
system.dma_cntrl.dma_sequencer.port = dma_device.test
else:
system.dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl_nodes.append(dma_cntrl) dma_cntrl_nodes.append(dma_cntrl)
all_cntrls = l1_cntrl_nodes + \ all_cntrls = l1_cntrl_nodes + \

View file

@ -132,6 +132,10 @@ def create_system(options, system, piobus, dma_devices):
dma_sequencer = dma_seq) dma_sequencer = dma_seq)
exec("system.dma_cntrl%d = dma_cntrl" % i) exec("system.dma_cntrl%d = dma_cntrl" % i)
if dma_device.type == 'MemTest':
system.dma_cntrl.dma_sequencer.port = dma_device.test
else:
system.dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl_nodes.append(dma_cntrl) dma_cntrl_nodes.append(dma_cntrl)

View file

@ -151,6 +151,10 @@ def create_system(options, system, piobus, dma_devices):
dma_sequencer = dma_seq) dma_sequencer = dma_seq)
exec("system.dma_cntrl%d = dma_cntrl" % i) exec("system.dma_cntrl%d = dma_cntrl" % i)
if dma_device.type == 'MemTest':
system.dma_cntrl.dma_sequencer.port = dma_device.test
else:
system.dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl_nodes.append(dma_cntrl) dma_cntrl_nodes.append(dma_cntrl)

View file

@ -173,6 +173,10 @@ def create_system(options, system, piobus, dma_devices):
dma_sequencer = dma_seq) dma_sequencer = dma_seq)
exec("system.dma_cntrl%d = dma_cntrl" % i) exec("system.dma_cntrl%d = dma_cntrl" % i)
if dma_device.type == 'MemTest':
system.dma_cntrl.dma_sequencer.port = dma_device.test
else:
system.dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl_nodes.append(dma_cntrl) dma_cntrl_nodes.append(dma_cntrl)

View file

@ -139,6 +139,10 @@ def create_system(options, system, piobus, dma_devices):
dma_sequencer = dma_seq) dma_sequencer = dma_seq)
exec("system.dma_cntrl%d = dma_cntrl" % i) exec("system.dma_cntrl%d = dma_cntrl" % i)
if dma_device.type == 'MemTest':
system.dma_cntrl.dma_sequencer.port = dma_device.test
else:
system.dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma
dma_cntrl_nodes.append(dma_cntrl) dma_cntrl_nodes.append(dma_cntrl)

View file

@ -38,6 +38,7 @@ class MemTest(MemObject):
percent_dest_unaligned = Param.Percent(50, percent_dest_unaligned = Param.Percent(50,
"percent of copy dest address that are unaligned") "percent of copy dest address that are unaligned")
percent_reads = Param.Percent(65, "target read percentage") percent_reads = Param.Percent(65, "target read percentage")
issue_dmas = Param.Bool(False, "this memtester should issue dma requests")
percent_source_unaligned = Param.Percent(50, percent_source_unaligned = Param.Percent(50,
"percent of copy source address that are unaligned") "percent of copy source address that are unaligned")
percent_functional = Param.Percent(50, "percent of access that are functional") percent_functional = Param.Percent(50, "percent of access that are functional")

View file

@ -109,8 +109,20 @@ MemTest::sendPkt(PacketPtr pkt) {
completeRequest(pkt); completeRequest(pkt);
} }
else if (!cachePort.sendTiming(pkt)) { else if (!cachePort.sendTiming(pkt)) {
DPRINTF(MemTest, "accessRetry setting to true\n");
//
// dma requests should never be retried
//
if (issueDmas) {
panic("Nacked DMA requests are not supported\n");
}
accessRetry = true; accessRetry = true;
retryPkt = pkt; retryPkt = pkt;
} else {
if (issueDmas) {
dmaOutstanding = true;
}
} }
} }
@ -127,6 +139,7 @@ MemTest::MemTest(const Params *p)
percentReads(p->percent_reads), percentReads(p->percent_reads),
percentFunctional(p->percent_functional), percentFunctional(p->percent_functional),
percentUncacheable(p->percent_uncacheable), percentUncacheable(p->percent_uncacheable),
issueDmas(p->issue_dmas),
progressInterval(p->progress_interval), progressInterval(p->progress_interval),
nextProgressMessage(p->progress_interval), nextProgressMessage(p->progress_interval),
percentSourceUnaligned(p->percent_source_unaligned), percentSourceUnaligned(p->percent_source_unaligned),
@ -134,6 +147,7 @@ MemTest::MemTest(const Params *p)
maxLoads(p->max_loads), maxLoads(p->max_loads),
atomic(p->atomic) atomic(p->atomic)
{ {
vector<string> cmd; vector<string> cmd;
cmd.push_back("/bin/ls"); cmd.push_back("/bin/ls");
vector<string> null_vec; vector<string> null_vec;
@ -143,6 +157,8 @@ MemTest::MemTest(const Params *p)
cachePort.snoopRangeSent = false; cachePort.snoopRangeSent = false;
funcPort.snoopRangeSent = true; funcPort.snoopRangeSent = true;
id = TESTER_ALLOCATOR++;
// Needs to be masked off once we know the block size. // Needs to be masked off once we know the block size.
traceBlockAddr = p->trace_addr; traceBlockAddr = p->trace_addr;
baseAddr1 = 0x100000; baseAddr1 = 0x100000;
@ -154,9 +170,8 @@ MemTest::MemTest(const Params *p)
numReads = 0; numReads = 0;
schedule(tickEvent, 0); schedule(tickEvent, 0);
id = TESTER_ALLOCATOR++;
accessRetry = false; accessRetry = false;
dmaOutstanding = false;
} }
Port * Port *
@ -188,6 +203,10 @@ MemTest::completeRequest(PacketPtr pkt)
{ {
Request *req = pkt->req; Request *req = pkt->req;
if (issueDmas) {
dmaOutstanding = false;
}
DPRINTF(MemTest, "completing %s at address %x (blk %x)\n", DPRINTF(MemTest, "completing %s at address %x (blk %x)\n",
pkt->isWrite() ? "write" : "read", pkt->isWrite() ? "write" : "read",
req->getPaddr(), blockAddr(req->getPaddr())); req->getPaddr(), blockAddr(req->getPaddr()));
@ -265,11 +284,15 @@ MemTest::tick()
schedule(tickEvent, curTick + ticks(1)); schedule(tickEvent, curTick + ticks(1));
if (++noResponseCycles >= 500000) { if (++noResponseCycles >= 500000) {
if (issueDmas) {
cerr << "DMA tester ";
}
cerr << name() << ": deadlocked at cycle " << curTick << endl; cerr << name() << ": deadlocked at cycle " << curTick << endl;
fatal(""); fatal("");
} }
if (accessRetry) { if (accessRetry || (issueDmas && dmaOutstanding)) {
DPRINTF(MemTest, "MemTester waiting on accessRetry or DMA response\n");
return; return;
} }
@ -281,6 +304,8 @@ MemTest::tick()
unsigned access_size = random() % 4; unsigned access_size = random() % 4;
bool uncacheable = (random() % 100) < percentUncacheable; bool uncacheable = (random() % 100) < percentUncacheable;
unsigned dma_access_size = random() % 4;
//If we aren't doing copies, use id as offset, and do a false sharing //If we aren't doing copies, use id as offset, and do a false sharing
//mem tester //mem tester
//We can eliminate the lower bits of the offset, and then use the id //We can eliminate the lower bits of the offset, and then use the id
@ -288,6 +313,7 @@ MemTest::tick()
offset = blockAddr(offset); offset = blockAddr(offset);
offset += id; offset += id;
access_size = 0; access_size = 0;
dma_access_size = 0;
Request *req = new Request(); Request *req = new Request();
Request::Flags flags; Request::Flags flags;
@ -296,14 +322,21 @@ MemTest::tick()
if (uncacheable) { if (uncacheable) {
flags.set(Request::UNCACHEABLE); flags.set(Request::UNCACHEABLE);
paddr = uncacheAddr + offset; paddr = uncacheAddr + offset;
} else { } else {
paddr = ((base) ? baseAddr1 : baseAddr2) + offset; paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
} }
bool probe = (random() % 100 < percentFunctional) && !uncacheable; bool probe = (random() % 100 < percentFunctional) && !uncacheable;
paddr &= ~((1 << access_size) - 1); if (issueDmas) {
req->setPhys(paddr, 1 << access_size, flags); paddr &= ~((1 << dma_access_size) - 1);
req->setThreadContext(id,0); req->setPhys(paddr, 1 << dma_access_size, flags);
req->setThreadContext(id,0);
} else {
paddr &= ~((1 << access_size) - 1);
req->setPhys(paddr, 1 << access_size, flags);
req->setThreadContext(id,0);
}
assert(req->getSize() == 1);
uint8_t *result = new uint8_t[8]; uint8_t *result = new uint8_t[8];
@ -325,8 +358,8 @@ MemTest::tick()
funcPort.readBlob(req->getPaddr(), result, req->getSize()); funcPort.readBlob(req->getPaddr(), result, req->getSize());
DPRINTF(MemTest, DPRINTF(MemTest,
"initiating read at address %x (blk %x) expecting %x\n", "id %d initiating read at address %x (blk %x) expecting %x\n",
req->getPaddr(), blockAddr(req->getPaddr()), *result); id, req->getPaddr(), blockAddr(req->getPaddr()), *result);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast); PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
pkt->setSrc(0); pkt->setSrc(0);
@ -380,6 +413,7 @@ void
MemTest::doRetry() MemTest::doRetry()
{ {
if (cachePort.sendTiming(retryPkt)) { if (cachePort.sendTiming(retryPkt)) {
DPRINTF(MemTest, "accessRetry setting to false\n");
accessRetry = false; accessRetry = false;
retryPkt = NULL; retryPkt = NULL;
} }

View file

@ -132,6 +132,11 @@ class MemTest : public MemObject
PacketPtr retryPkt; PacketPtr retryPkt;
bool accessRetry; bool accessRetry;
//
// The dmaOustanding flag enforces only one dma at a time
//
bool dmaOutstanding;
unsigned size; // size of testing memory region unsigned size; // size of testing memory region
@ -139,6 +144,8 @@ class MemTest : public MemObject
unsigned percentFunctional; // target percentage of functional accesses unsigned percentFunctional; // target percentage of functional accesses
unsigned percentUncacheable; unsigned percentUncacheable;
bool issueDmas;
int id; int id;
std::set<unsigned> outstandingAddrs; std::set<unsigned> outstandingAddrs;

View file

@ -49,6 +49,10 @@ DMASequencer::init()
RequestStatus RequestStatus
DMASequencer::makeRequest(const RubyRequest &request) DMASequencer::makeRequest(const RubyRequest &request)
{ {
if (m_is_busy) {
return RequestStatus_BufferFull;
}
uint64_t paddr = request.paddr; uint64_t paddr = request.paddr;
uint8_t* data = request.data; uint8_t* data = request.data;
int len = request.len; int len = request.len;
@ -108,6 +112,7 @@ DMASequencer::issueNext()
assert(m_is_busy == true); assert(m_is_busy == true);
active_request.bytes_completed = active_request.bytes_issued; active_request.bytes_completed = active_request.bytes_issued;
if (active_request.len == active_request.bytes_completed) { if (active_request.len == active_request.bytes_completed) {
DPRINTF(RubyDma, "DMA request completed\n");
ruby_hit_callback(active_request.pkt); ruby_hit_callback(active_request.pkt);
m_is_busy = false; m_is_busy = false;
return; return;
@ -141,6 +146,7 @@ DMASequencer::issueNext()
assert(m_mandatory_q_ptr != NULL); assert(m_mandatory_q_ptr != NULL);
m_mandatory_q_ptr->enqueue(msg); m_mandatory_q_ptr->enqueue(msg);
active_request.bytes_issued += msg->getLen(); active_request.bytes_issued += msg->getLen();
DPRINTF(RubyDma, "Next DMA segment issued to the DMA cntrl\n");
} }
void void

View file

@ -52,3 +52,4 @@ Source('System.cc')
Source('TimerTable.cc') Source('TimerTable.cc')
TraceFlag('RubyCache') TraceFlag('RubyCache')
TraceFlag('RubyDma')