From 808701a10c7d2b79feb3746c2b47c5faa3fca042 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Fri, 20 Aug 2010 11:46:12 -0700 Subject: [PATCH] memtest: Memtester support for DMA This patch adds DMA testing to the Memtester and is inherits many changes from Polina's old tester_dma_extension patch. Since Ruby does not work in atomic mode, the atomic mode options are removed. --- configs/example/memtest-ruby.py | 59 ++++++++++++++++++----------- configs/ruby/MESI_CMP_directory.py | 4 ++ configs/ruby/MI_example.py | 4 ++ configs/ruby/MOESI_CMP_directory.py | 4 ++ configs/ruby/MOESI_CMP_token.py | 4 ++ configs/ruby/MOESI_hammer.py | 4 ++ src/cpu/memtest/MemTest.py | 1 + src/cpu/memtest/memtest.cc | 52 ++++++++++++++++++++----- src/cpu/memtest/memtest.hh | 7 ++++ src/mem/ruby/system/DMASequencer.cc | 6 +++ src/mem/ruby/system/SConscript | 1 + 11 files changed, 114 insertions(+), 32 deletions(-) diff --git a/configs/example/memtest-ruby.py b/configs/example/memtest-ruby.py index d67f300fc..d2e9c137e 100644 --- a/configs/example/memtest-ruby.py +++ b/configs/example/memtest-ruby.py @@ -48,25 +48,13 @@ m5_root = os.path.dirname(config_root) parser = optparse.OptionParser() -parser.add_option("-a", "--atomic", action="store_true", - help="Use atomic (non-timing) mode") -parser.add_option("-b", "--blocking", action="store_true", - help="Use blocking caches") parser.add_option("-l", "--maxloads", metavar="N", default=0, help="Stop after N loads") -parser.add_option("-f", "--functional", type="int", default=0, - metavar="PCT", - help="Target percentage of functional accesses " - "[default: %default]") -parser.add_option("-u", "--uncacheable", type="int", default=0, - metavar="PCT", - help="Target percentage of uncacheable accesses " - "[default: %default]") - parser.add_option("--progress", type="int", default=1000, metavar="NLOADS", help="Progress message interval " "[default: %default]") +parser.add_option("--num-dmas", type="int", default=0, help="# of dma testers") # # Add the ruby specific and protocol specific options @@ -101,36 +89,61 @@ if options.num_cpus > block_size: % (options.num_cpus, block_size) sys.exit(1) -cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, \ - percent_functional=options.functional, \ - percent_uncacheable=options.uncacheable, \ - progress_interval=options.progress) \ +# +# Currently ruby does not support atomic, functional, or uncacheable accesses +# +cpus = [ MemTest(atomic = False, \ + max_loads = options.maxloads, \ + issue_dmas = False, \ + percent_functional = 0, \ + percent_uncacheable = 0, \ + progress_interval = options.progress) \ for i in xrange(options.num_cpus) ] system = System(cpu = cpus, funcmem = PhysicalMemory(), physmem = PhysicalMemory()) -system.ruby = Ruby.create_system(options, system) +system.dmas = [ MemTest(atomic = False, \ + max_loads = options.maxloads, \ + issue_dmas = True, \ + percent_functional = 0, \ + percent_uncacheable = 0, \ + progress_interval = options.progress) \ + for i in xrange(options.num_dmas) ] +system.ruby = Ruby.create_system(options, \ + system.physmem, \ + dma_devices = system.dmas) + +# +# The tester is most effective when randomization is turned on and +# artifical delay is randomly inserted on messages +# +system.ruby.randomization = True + assert(len(cpus) == len(system.ruby.cpu_ruby_ports)) for (i, cpu) in enumerate(cpus): # - # Tie the memtester ports to the correct system ports + # Tie the cpu memtester ports to the correct system ports # cpu.test = system.ruby.cpu_ruby_ports[i].port cpu.functional = system.funcmem.port +for (i, dma) in enumerate(system.dmas): + # + # Tie the dma memtester ports to the correct functional port + # Note that the test port has already been connected to the dma_sequencer + # + dma.functional = system.funcmem.port + # ----------------------- # run simulation # ----------------------- root = Root( system = system ) -if options.atomic: - root.system.mem_mode = 'atomic' -else: - root.system.mem_mode = 'timing' +root.system.mem_mode = 'timing' # Not much point in this being higher than the L1 latency m5.ticks.setGlobalFrequency('1ns') diff --git a/configs/ruby/MESI_CMP_directory.py b/configs/ruby/MESI_CMP_directory.py index d336ca7e4..f6dafa44a 100644 --- a/configs/ruby/MESI_CMP_directory.py +++ b/configs/ruby/MESI_CMP_directory.py @@ -153,6 +153,10 @@ def create_system(options, system, piobus, dma_devices): dma_cntrl.dma_sequencer.port = dma_device.dma exec("system.dma_cntrl%d = dma_cntrl" % i) + if dma_device.type == 'MemTest': + system.dma_cntrl.dma_sequencer.port = dma_device.test + else: + system.dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl_nodes.append(dma_cntrl) all_cntrls = l1_cntrl_nodes + \ diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index c51e19e09..0e101d18f 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -132,6 +132,10 @@ def create_system(options, system, piobus, dma_devices): dma_sequencer = dma_seq) exec("system.dma_cntrl%d = dma_cntrl" % i) + if dma_device.type == 'MemTest': + system.dma_cntrl.dma_sequencer.port = dma_device.test + else: + system.dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl_nodes.append(dma_cntrl) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index ebc628fc0..4498617a9 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -151,6 +151,10 @@ def create_system(options, system, piobus, dma_devices): dma_sequencer = dma_seq) exec("system.dma_cntrl%d = dma_cntrl" % i) + if dma_device.type == 'MemTest': + system.dma_cntrl.dma_sequencer.port = dma_device.test + else: + system.dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl_nodes.append(dma_cntrl) diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index db704cfd8..f2669ef78 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -173,6 +173,10 @@ def create_system(options, system, piobus, dma_devices): dma_sequencer = dma_seq) exec("system.dma_cntrl%d = dma_cntrl" % i) + if dma_device.type == 'MemTest': + system.dma_cntrl.dma_sequencer.port = dma_device.test + else: + system.dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl_nodes.append(dma_cntrl) diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 45b08a288..5cf27206b 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -139,6 +139,10 @@ def create_system(options, system, piobus, dma_devices): dma_sequencer = dma_seq) exec("system.dma_cntrl%d = dma_cntrl" % i) + if dma_device.type == 'MemTest': + system.dma_cntrl.dma_sequencer.port = dma_device.test + else: + system.dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl.dma_sequencer.port = dma_device.dma dma_cntrl_nodes.append(dma_cntrl) diff --git a/src/cpu/memtest/MemTest.py b/src/cpu/memtest/MemTest.py index 8e1b3a8d0..957de8088 100644 --- a/src/cpu/memtest/MemTest.py +++ b/src/cpu/memtest/MemTest.py @@ -38,6 +38,7 @@ class MemTest(MemObject): percent_dest_unaligned = Param.Percent(50, "percent of copy dest address that are unaligned") percent_reads = Param.Percent(65, "target read percentage") + issue_dmas = Param.Bool(False, "this memtester should issue dma requests") percent_source_unaligned = Param.Percent(50, "percent of copy source address that are unaligned") percent_functional = Param.Percent(50, "percent of access that are functional") diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index fccb8435f..7b3ed3166 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -109,8 +109,20 @@ MemTest::sendPkt(PacketPtr pkt) { completeRequest(pkt); } else if (!cachePort.sendTiming(pkt)) { + DPRINTF(MemTest, "accessRetry setting to true\n"); + + // + // dma requests should never be retried + // + if (issueDmas) { + panic("Nacked DMA requests are not supported\n"); + } accessRetry = true; retryPkt = pkt; + } else { + if (issueDmas) { + dmaOutstanding = true; + } } } @@ -127,6 +139,7 @@ MemTest::MemTest(const Params *p) percentReads(p->percent_reads), percentFunctional(p->percent_functional), percentUncacheable(p->percent_uncacheable), + issueDmas(p->issue_dmas), progressInterval(p->progress_interval), nextProgressMessage(p->progress_interval), percentSourceUnaligned(p->percent_source_unaligned), @@ -134,6 +147,7 @@ MemTest::MemTest(const Params *p) maxLoads(p->max_loads), atomic(p->atomic) { + vector cmd; cmd.push_back("/bin/ls"); vector null_vec; @@ -143,6 +157,8 @@ MemTest::MemTest(const Params *p) cachePort.snoopRangeSent = false; funcPort.snoopRangeSent = true; + id = TESTER_ALLOCATOR++; + // Needs to be masked off once we know the block size. traceBlockAddr = p->trace_addr; baseAddr1 = 0x100000; @@ -154,9 +170,8 @@ MemTest::MemTest(const Params *p) numReads = 0; schedule(tickEvent, 0); - id = TESTER_ALLOCATOR++; - accessRetry = false; + dmaOutstanding = false; } Port * @@ -188,6 +203,10 @@ MemTest::completeRequest(PacketPtr pkt) { Request *req = pkt->req; + if (issueDmas) { + dmaOutstanding = false; + } + DPRINTF(MemTest, "completing %s at address %x (blk %x)\n", pkt->isWrite() ? "write" : "read", req->getPaddr(), blockAddr(req->getPaddr())); @@ -265,11 +284,15 @@ MemTest::tick() schedule(tickEvent, curTick + ticks(1)); if (++noResponseCycles >= 500000) { + if (issueDmas) { + cerr << "DMA tester "; + } cerr << name() << ": deadlocked at cycle " << curTick << endl; fatal(""); } - if (accessRetry) { + if (accessRetry || (issueDmas && dmaOutstanding)) { + DPRINTF(MemTest, "MemTester waiting on accessRetry or DMA response\n"); return; } @@ -281,6 +304,8 @@ MemTest::tick() unsigned access_size = random() % 4; bool uncacheable = (random() % 100) < percentUncacheable; + unsigned dma_access_size = random() % 4; + //If we aren't doing copies, use id as offset, and do a false sharing //mem tester //We can eliminate the lower bits of the offset, and then use the id @@ -288,6 +313,7 @@ MemTest::tick() offset = blockAddr(offset); offset += id; access_size = 0; + dma_access_size = 0; Request *req = new Request(); Request::Flags flags; @@ -296,14 +322,21 @@ MemTest::tick() if (uncacheable) { flags.set(Request::UNCACHEABLE); paddr = uncacheAddr + offset; - } else { + } else { paddr = ((base) ? baseAddr1 : baseAddr2) + offset; } bool probe = (random() % 100 < percentFunctional) && !uncacheable; - paddr &= ~((1 << access_size) - 1); - req->setPhys(paddr, 1 << access_size, flags); - req->setThreadContext(id,0); + if (issueDmas) { + paddr &= ~((1 << dma_access_size) - 1); + req->setPhys(paddr, 1 << dma_access_size, flags); + req->setThreadContext(id,0); + } else { + paddr &= ~((1 << access_size) - 1); + req->setPhys(paddr, 1 << access_size, flags); + req->setThreadContext(id,0); + } + assert(req->getSize() == 1); uint8_t *result = new uint8_t[8]; @@ -325,8 +358,8 @@ MemTest::tick() funcPort.readBlob(req->getPaddr(), result, req->getSize()); DPRINTF(MemTest, - "initiating read at address %x (blk %x) expecting %x\n", - req->getPaddr(), blockAddr(req->getPaddr()), *result); + "id %d initiating read at address %x (blk %x) expecting %x\n", + id, req->getPaddr(), blockAddr(req->getPaddr()), *result); PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast); pkt->setSrc(0); @@ -380,6 +413,7 @@ void MemTest::doRetry() { if (cachePort.sendTiming(retryPkt)) { + DPRINTF(MemTest, "accessRetry setting to false\n"); accessRetry = false; retryPkt = NULL; } diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 907659f69..bb71da355 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -132,6 +132,11 @@ class MemTest : public MemObject PacketPtr retryPkt; bool accessRetry; + + // + // The dmaOustanding flag enforces only one dma at a time + // + bool dmaOutstanding; unsigned size; // size of testing memory region @@ -139,6 +144,8 @@ class MemTest : public MemObject unsigned percentFunctional; // target percentage of functional accesses unsigned percentUncacheable; + bool issueDmas; + int id; std::set outstandingAddrs; diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index a7f3a8aec..4d10a1e2f 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -49,6 +49,10 @@ DMASequencer::init() RequestStatus DMASequencer::makeRequest(const RubyRequest &request) { + if (m_is_busy) { + return RequestStatus_BufferFull; + } + uint64_t paddr = request.paddr; uint8_t* data = request.data; int len = request.len; @@ -108,6 +112,7 @@ DMASequencer::issueNext() assert(m_is_busy == true); active_request.bytes_completed = active_request.bytes_issued; if (active_request.len == active_request.bytes_completed) { + DPRINTF(RubyDma, "DMA request completed\n"); ruby_hit_callback(active_request.pkt); m_is_busy = false; return; @@ -141,6 +146,7 @@ DMASequencer::issueNext() assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg); active_request.bytes_issued += msg->getLen(); + DPRINTF(RubyDma, "Next DMA segment issued to the DMA cntrl\n"); } void diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript index 2d14229d7..6d1aff31d 100644 --- a/src/mem/ruby/system/SConscript +++ b/src/mem/ruby/system/SConscript @@ -52,3 +52,4 @@ Source('System.cc') Source('TimerTable.cc') TraceFlag('RubyCache') +TraceFlag('RubyDma')