cpu: Add per-thread monitors

Adds per-thread address monitors to support FullSystem SMT.
This commit is contained in:
Mitch Hayenga 2015-09-30 11:14:19 -05:00
parent 582a0148b4
commit fafa83ed32
14 changed files with 128 additions and 64 deletions

View file

@ -133,7 +133,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
numThreads(p->numThreads), system(p->system), numThreads(p->numThreads), system(p->system),
functionTraceStream(nullptr), currentFunctionStart(0), functionTraceStream(nullptr), currentFunctionStart(0),
currentFunctionEnd(0), functionEntryTick(0), currentFunctionEnd(0), functionEntryTick(0),
addressMonitor() addressMonitor(p->numThreads)
{ {
// if Python did not provide a valid ID, do it here // if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) { if (_cpuId == -1 ) {
@ -271,39 +271,48 @@ BaseCPU::~BaseCPU()
} }
void void
BaseCPU::armMonitor(Addr address) BaseCPU::armMonitor(ThreadID tid, Addr address)
{ {
addressMonitor.armed = true; assert(tid < numThreads);
addressMonitor.vAddr = address; AddressMonitor &monitor = addressMonitor[tid];
addressMonitor.pAddr = 0x0;
DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address); monitor.armed = true;
monitor.vAddr = address;
monitor.pAddr = 0x0;
DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
} }
bool bool
BaseCPU::mwait(PacketPtr pkt) BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
{ {
if(addressMonitor.gotWakeup == false) { assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
if(monitor.gotWakeup == false) {
int block_size = cacheLineSize(); int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1)); uint64_t mask = ~((uint64_t)(block_size - 1));
assert(pkt->req->hasPaddr()); assert(pkt->req->hasPaddr());
addressMonitor.pAddr = pkt->getAddr() & mask; monitor.pAddr = pkt->getAddr() & mask;
addressMonitor.waiting = true; monitor.waiting = true;
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
addressMonitor.vAddr, addressMonitor.pAddr); "line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
return true; return true;
} else { } else {
addressMonitor.gotWakeup = false; monitor.gotWakeup = false;
return false; return false;
} }
} }
void void
BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb) BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb)
{ {
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
Request req; Request req;
Addr addr = addressMonitor.vAddr; Addr addr = monitor.vAddr;
int block_size = cacheLineSize(); int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1)); uint64_t mask = ~((uint64_t)(block_size - 1));
int size = block_size; int size = block_size;
@ -320,11 +329,11 @@ BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read); Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
assert(fault == NoFault); assert(fault == NoFault);
addressMonitor.pAddr = req.getPaddr() & mask; monitor.pAddr = req.getPaddr() & mask;
addressMonitor.waiting = true; monitor.waiting = true;
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
addressMonitor.vAddr, addressMonitor.pAddr); tid, monitor.vAddr, monitor.pAddr);
} }
void void

View file

@ -559,14 +559,17 @@ class BaseCPU : public MemObject
Stats::Scalar numWorkItemsCompleted; Stats::Scalar numWorkItemsCompleted;
private: private:
AddressMonitor addressMonitor; std::vector<AddressMonitor> addressMonitor;
public: public:
void armMonitor(Addr address); void armMonitor(ThreadID tid, Addr address);
bool mwait(PacketPtr pkt); bool mwait(ThreadID tid, PacketPtr pkt);
void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb); void mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb);
AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; } AddressMonitor *getCpuAddrMonitor(ThreadID tid)
void atomicNotify(Addr address); {
assert(tid < numThreads);
return &addressMonitor[tid];
}
}; };
#endif // THE_ISA == NULL_ISA #endif // THE_ISA == NULL_ISA

View file

@ -863,11 +863,12 @@ class BaseDynInst : public ExecContext, public RefCounted
public: public:
// monitor/mwait funtions // monitor/mwait funtions
void armMonitor(Addr address) { cpu->armMonitor(address); } void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); }
bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); } bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); }
void mwaitAtomic(ThreadContext *tc) void mwaitAtomic(ThreadContext *tc)
{ return cpu->mwaitAtomic(tc, cpu->dtb); } { return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); }
AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); } AddressMonitor *getAddrMonitor()
{ return cpu->getCpuAddrMonitor(threadNumber); }
}; };
template<class Impl> template<class Impl>

View file

@ -350,11 +350,11 @@ class CheckerCPU : public BaseCPU, public ExecContext
} }
// monitor/mwait funtions // monitor/mwait funtions
virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); } virtual void armMonitor(Addr address) { BaseCPU::armMonitor(0, address); }
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); } bool mwait(PacketPtr pkt) { return BaseCPU::mwait(0, pkt); }
void mwaitAtomic(ThreadContext *tc) void mwaitAtomic(ThreadContext *tc)
{ return BaseCPU::mwaitAtomic(tc, thread->dtb); } { return BaseCPU::mwaitAtomic(0, tc, thread->dtb); }
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); } AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(0); }
void demapInstPage(Addr vaddr, uint64_t asn) void demapInstPage(Addr vaddr, uint64_t asn)
{ {

View file

@ -343,12 +343,12 @@ class ExecContext : public ::ExecContext
public: public:
// monitor/mwait funtions // monitor/mwait funtions
void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); } void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); } bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
void mwaitAtomic(ThreadContext *tc) void mwaitAtomic(ThreadContext *tc)
{ return getCpuPtr()->mwaitAtomic(tc, thread.dtb); } { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
AddressMonitor *getAddrMonitor() AddressMonitor *getAddrMonitor()
{ return getCpuPtr()->getCpuAddrMonitor(); } { return getCpuPtr()->getCpuAddrMonitor(0); }
}; };
} }

View file

@ -135,7 +135,8 @@ Fetch1::fetchLine()
"%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n", "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
request_id, aligned_pc, pc, line_offset, request_size); request_id, aligned_pc, pc, line_offset, request_size);
request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(),
/* thread id */ 0);
request->request.setVirt(0 /* asid */, request->request.setVirt(0 /* asid */,
aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(), aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
/* I've no idea why we need the PC, but give it */ /* I've no idea why we need the PC, but give it */

View file

@ -1501,7 +1501,8 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
if (inst->traceData) if (inst->traceData)
inst->traceData->setMem(addr, size, flags); inst->traceData->setMem(addr, size, flags);
request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
request->request.setThreadContext(cid, /* thread id */ 0);
request->request.setVirt(0 /* asid */, request->request.setVirt(0 /* asid */,
addr, size, flags, cpu.dataMasterId(), addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */ /* I've no idea why we need the PC, but give it */

View file

@ -118,9 +118,10 @@ template <class Impl>
void void
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt) FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{ {
// X86 ISA: Snooping an invalidation for monitor/mwait for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup(); cpu->wakeup();
}
} }
lsq->recvTimingSnoopReq(pkt); lsq->recvTimingSnoopReq(pkt);
} }

View file

@ -86,9 +86,10 @@ AtomicSimpleCPU::init()
{ {
BaseSimpleCPU::init(); BaseSimpleCPU::init();
ifetch_req.setThreadContext(_cpuId, 0); int cid = threadContexts[0]->contextId();
data_read_req.setThreadContext(_cpuId, 0); ifetch_req.setThreadContext(cid, 0);
data_write_req.setThreadContext(_cpuId, 0); data_read_req.setThreadContext(cid, 0);
data_write_req.setThreadContext(cid, 0);
} }
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@ -130,6 +131,24 @@ AtomicSimpleCPU::drain()
} }
} }
void
AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
{
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (tid != sender) {
if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
wakeup();
}
TheISA::handleLockedSnoop(threadInfo[tid]->thread,
pkt, dcachePort.cacheBlockMask);
}
}
}
void void
AtomicSimpleCPU::drainResume() AtomicSimpleCPU::drainResume()
{ {
@ -265,8 +284,11 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait // X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
cpu->wakeup(); for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
} }
// if snoop invalidates, release any associated locks // if snoop invalidates, release any associated locks
@ -289,8 +311,10 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait // X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
cpu->wakeup(); if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
} }
// if snoop invalidates, release any associated locks // if snoop invalidates, release any associated locks
@ -460,6 +484,9 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
system->getPhysMem().access(&pkt); system->getPhysMem().access(&pkt);
else else
dcache_latency += dcachePort.sendAtomic(&pkt); dcache_latency += dcachePort.sendAtomic(&pkt);
// Notify other threads on this CPU of write
threadSnoop(&pkt, curThread);
} }
dcache_access = true; dcache_access = true;
assert(!pkt.isError()); assert(!pkt.isError());
@ -516,9 +543,11 @@ AtomicSimpleCPU::tick()
// Set memroy request ids to current thread // Set memroy request ids to current thread
if (numThreads > 1) { if (numThreads > 1) {
ifetch_req.setThreadContext(_cpuId, curThread); ContextID cid = threadContexts[curThread]->contextId();
data_read_req.setThreadContext(_cpuId, curThread);
data_write_req.setThreadContext(_cpuId, curThread); ifetch_req.setThreadContext(cid, curThread);
data_read_req.setThreadContext(cid, curThread);
data_write_req.setThreadContext(cid, curThread);
} }
SimpleExecContext& t_info = *threadInfo[curThread]; SimpleExecContext& t_info = *threadInfo[curThread];

View file

@ -186,6 +186,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
/** Return a reference to the instruction port. */ /** Return a reference to the instruction port. */
virtual MasterPort &getInstPort() { return icachePort; } virtual MasterPort &getInstPort() { return icachePort; }
/** Perform snoop for other cpu-local thread contexts. */
void threadSnoop(PacketPtr pkt, ThreadID sender);
public: public:
DrainState drain() M5_ATTR_OVERRIDE; DrainState drain() M5_ATTR_OVERRIDE;

View file

@ -418,9 +418,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
void void
BaseSimpleCPU::wakeup() BaseSimpleCPU::wakeup()
{ {
getCpuAddrMonitor()->gotWakeup = true;
for (ThreadID tid = 0; tid < numThreads; tid++) { for (ThreadID tid = 0; tid < numThreads; tid++) {
getCpuAddrMonitor(tid)->gotWakeup = true;
if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) { if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
DPRINTF(Quiesce,"Suspended Processor awoke\n"); DPRINTF(Quiesce,"Suspended Processor awoke\n");
threadInfo[tid]->thread->activate(); threadInfo[tid]->thread->activate();

View file

@ -376,22 +376,22 @@ class SimpleExecContext : public ExecContext {
void armMonitor(Addr address) M5_ATTR_OVERRIDE void armMonitor(Addr address) M5_ATTR_OVERRIDE
{ {
cpu->armMonitor(address); cpu->armMonitor(thread->threadId(), address);
} }
bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
{ {
return cpu->mwait(pkt); return cpu->mwait(thread->threadId(), pkt);
} }
void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
{ {
cpu->mwaitAtomic(tc, thread->dtb); cpu->mwaitAtomic(thread->threadId(), tc, thread->dtb);
} }
AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
{ {
return cpu->getCpuAddrMonitor(); return cpu->getCpuAddrMonitor(thread->threadId());
} }
#if THE_ISA == MIPS_ISA #if THE_ISA == MIPS_ISA

View file

@ -302,6 +302,7 @@ TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
if (do_access) { if (do_access) {
dcache_pkt = pkt; dcache_pkt = pkt;
handleWritePacket(); handleWritePacket();
threadSnoop(pkt, curThread);
} else { } else {
_status = DcacheWaitResponse; _status = DcacheWaitResponse;
completeDataAccess(pkt); completeDataAccess(pkt);
@ -538,6 +539,19 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
return NoFault; return NoFault;
} }
void
TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (tid != sender) {
if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
wakeup();
}
TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt,
dcachePort.cacheBlockMask);
}
}
}
void void
TimingSimpleCPU::finishTranslation(WholeTranslationState *state) TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
@ -849,9 +863,10 @@ TimingSimpleCPU::updateCycleCounts()
void void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{ {
// X86 ISA: Snooping an invalidation for monitor/mwait for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup(); cpu->wakeup();
}
} }
for (auto &t_info : cpu->threadInfo) { for (auto &t_info : cpu->threadInfo) {
@ -862,9 +877,10 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
void void
TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt) TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
{ {
// X86 ISA: Snooping an invalidation for monitor/mwait for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup(); cpu->wakeup();
}
} }
} }

View file

@ -132,6 +132,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
}; };
FetchTranslation fetchTranslation; FetchTranslation fetchTranslation;
void threadSnoop(PacketPtr pkt, ThreadID sender);
void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read); void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read);
void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req, void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req,
uint8_t *data, bool read); uint8_t *data, bool read);