From bddb7ad7b530a10594a9ad6d07648fb17fea607a Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Thu, 18 Nov 2004 06:11:01 -0500 Subject: [PATCH] Put back in SimpleCPU changes and Coherence Timing Bus changes Small fixes to read() in simpleCPU and small fixes to cache_impl.hh and to simple_mem_bank to deal with writeInv from DMA --HG-- extra : convert_revision : db24028c34b7a535aa0db55b43bad1d3d75cd258 --- cpu/memtest/memtest.cc | 49 +++++++++++++++++++++++++++++-- cpu/memtest/memtest.hh | 15 ++++++---- cpu/simple_cpu/simple_cpu.cc | 56 +++++++++++++++++++++++++----------- cpu/simple_cpu/simple_cpu.hh | 2 ++ 4 files changed, 97 insertions(+), 25 deletions(-) diff --git a/cpu/memtest/memtest.cc b/cpu/memtest/memtest.cc index 6584a62ba..e967c79da 100644 --- a/cpu/memtest/memtest.cc +++ b/cpu/memtest/memtest.cc @@ -28,9 +28,10 @@ // FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded -#include -#include #include +#include +#include +#include #include #include "base/misc.hh" @@ -44,6 +45,8 @@ using namespace std; +int TESTER_ALLOCATOR=0; + MemTest::MemTest(const string &name, MemInterface *_cache_interface, FunctionalMemory *main_mem, @@ -111,6 +114,8 @@ MemTest::MemTest(const string &name, noResponseCycles = 0; numReads = 0; tickEvent.schedule(0); + + id = TESTER_ALLOCATOR++; } static void @@ -127,6 +132,11 @@ printData(ostream &os, uint8_t *data, int nbytes) void MemTest::completeRequest(MemReqPtr &req, uint8_t *data) { + //Remove the address from the list of outstanding + std::set::iterator removeAddr = outstandingAddrs.find(req->paddr); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); + switch (req->cmd) { case Read: if (memcmp(req->data, data, req->size) != 0) { @@ -158,6 +168,10 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data) break; case Copy: + //Also remove dest from outstanding list + removeAddr = outstandingAddrs.find(req->dest); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); numCopiesStat++; break; @@ -212,7 +226,7 @@ MemTest::tick() if (!tickEvent.scheduled()) tickEvent.schedule(curTick + 1); - if (++noResponseCycles >= 5000) { + if (++noResponseCycles >= 500000) { cerr << name() << ": deadlocked at cycle " << curTick << endl; fatal(""); } @@ -232,6 +246,16 @@ MemTest::tick() unsigned source_align = rand() % 100; unsigned dest_align = rand() % 100; + //If we aren't doing copies, use id as offset, and do a false sharing + //mem tester + if (percentCopies == 0) { + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset1 &= ~63; //Not the low order bits + offset1 += id; + access_size = 0; + } + MemReqPtr req = new MemReq(); if (cacheable < percentUncacheable) { @@ -251,6 +275,13 @@ MemTest::tick() if (cmd < percentReads) { // read + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + req->cmd = Read; uint8_t *result = new uint8_t[8]; checkMem->access(Read, req->paddr, result, req->size); @@ -273,6 +304,13 @@ MemTest::tick() } } else if (cmd < (100 - percentCopies)){ // write + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + req->cmd = Write; memcpy(req->data, &data, req->size); checkMem->access(Write, req->paddr, req->data, req->size); @@ -298,6 +336,11 @@ MemTest::tick() // copy Addr source = ((base) ? baseAddr1 : baseAddr2) + offset1; Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2; + if (outstandingAddrs.find(source) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(source); + if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(dest); + if (source_align >= percentSourceUnaligned) { source = blockAddr(source); } diff --git a/cpu/memtest/memtest.hh b/cpu/memtest/memtest.hh index 72e0709d9..43b17a713 100644 --- a/cpu/memtest/memtest.hh +++ b/cpu/memtest/memtest.hh @@ -29,13 +29,14 @@ #ifndef __MEMTEST_HH__ #define __MEMTEST_HH__ -#include "sim/sim_object.hh" -#include "mem/mem_interface.hh" -#include "mem/functional_mem/functional_memory.hh" -#include "cpu/base_cpu.hh" -#include "cpu/exec_context.hh" +#include #include "base/statistics.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "mem/functional_mem/functional_memory.hh" +#include "mem/mem_interface.hh" +#include "sim/sim_object.hh" #include "sim/stats.hh" class MemTest : public BaseCPU @@ -87,6 +88,10 @@ class MemTest : public BaseCPU unsigned percentCopies; // target percentage of copy accesses unsigned percentUncacheable; + int id; + + std::set outstandingAddrs; + unsigned blockSize; Addr blockAddrMask; diff --git a/cpu/simple_cpu/simple_cpu.cc b/cpu/simple_cpu/simple_cpu.cc index d48f93663..9f4f821d4 100644 --- a/cpu/simple_cpu/simple_cpu.cc +++ b/cpu/simple_cpu/simple_cpu.cc @@ -414,21 +414,22 @@ template Fault SimpleCPU::read(Addr addr, T &data, unsigned flags) { + if (status() == DcacheMissStall) { + Fault fault = xc->read(memReq,data); + + if (traceData) { + traceData->setAddr(addr); + if (fault == No_Fault) + traceData->setData(data); + } + return fault; + } + memReq->reset(addr, sizeof(T), flags); // translate to physical address Fault fault = xc->translateDataReadReq(memReq); - // do functional access - if (fault == No_Fault) - fault = xc->read(memReq, data); - - if (traceData) { - traceData->setAddr(addr); - if (fault == No_Fault) - traceData->setData(data); - } - // if we have a cache, do cache access too if (fault == No_Fault && dcacheInterface) { memReq->cmd = Read; @@ -444,6 +445,24 @@ SimpleCPU::read(Addr addr, T &data, unsigned flags) lastDcacheStall = curTick; unscheduleTickEvent(); _status = DcacheMissStall; + } else { + // do functional access + fault = xc->read(memReq, data); + + if (traceData) { + traceData->setAddr(addr); + if (fault == No_Fault) + traceData->setData(data); + } + } + } else if(fault == No_Fault) { + // do functional access + fault = xc->read(memReq, data); + + if (traceData) { + traceData->setAddr(addr); + if (fault == No_Fault) + traceData->setData(data); } } @@ -605,6 +624,9 @@ SimpleCPU::processCacheCompletion() scheduleTickEvent(1); break; case DcacheMissStall: + if (memReq->cmd.isRead()) { + curStaticInst->execute(this,traceData); + } dcacheStallCycles += curTick - lastDcacheStall; _status = Running; scheduleTickEvent(1); @@ -750,10 +772,10 @@ SimpleCPU::tick() comInstEventQueue[0]->serviceEvents(numInst); // decode the instruction - inst = htoa(inst); - StaticInstPtr si(inst); + inst = htoa(inst); + curStaticInst = StaticInst::decode(inst); - traceData = Trace::getInstRecord(curTick, xc, this, si, + traceData = Trace::getInstRecord(curTick, xc, this, curStaticInst, xc->regs.pc); #ifdef FULL_SYSTEM @@ -762,18 +784,18 @@ SimpleCPU::tick() xc->func_exe_inst++; - fault = si->execute(this, traceData); + fault = curStaticInst->execute(this, traceData); #ifdef FULL_SYSTEM if (xc->fnbin) - xc->execute(si.get()); + xc->execute(curStaticInst.get()); #endif - if (si->isMemRef()) { + if (curStaticInst->isMemRef()) { numMemRefs++; } - if (si->isLoad()) { + if (curStaticInst->isLoad()) { ++numLoad; comLoadEventQueue[0]->serviceEvents(numLoad); } diff --git a/cpu/simple_cpu/simple_cpu.hh b/cpu/simple_cpu/simple_cpu.hh index 341a0da23..8104d73a4 100644 --- a/cpu/simple_cpu/simple_cpu.hh +++ b/cpu/simple_cpu/simple_cpu.hh @@ -184,6 +184,8 @@ class SimpleCPU : public BaseCPU // Refcounted pointer to the one memory request. MemReqPtr memReq; + StaticInstPtr curStaticInst; + class CacheCompletionEvent : public Event { private: