cpu: Add per-thread monitors

Adds per-thread address monitors to support FullSystem SMT.
This commit is contained in:
Mitch Hayenga 2015-09-30 11:14:19 -05:00
parent 582a0148b4
commit fafa83ed32
14 changed files with 128 additions and 64 deletions

View file

@ -133,7 +133,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
numThreads(p->numThreads), system(p->system),
functionTraceStream(nullptr), currentFunctionStart(0),
currentFunctionEnd(0), functionEntryTick(0),
addressMonitor()
addressMonitor(p->numThreads)
{
// if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) {
@ -271,39 +271,48 @@ BaseCPU::~BaseCPU()
}
void
BaseCPU::armMonitor(Addr address)
BaseCPU::armMonitor(ThreadID tid, Addr address)
{
addressMonitor.armed = true;
addressMonitor.vAddr = address;
addressMonitor.pAddr = 0x0;
DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
monitor.armed = true;
monitor.vAddr = address;
monitor.pAddr = 0x0;
DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
}
bool
BaseCPU::mwait(PacketPtr pkt)
BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
{
if(addressMonitor.gotWakeup == false) {
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
if(monitor.gotWakeup == false) {
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
assert(pkt->req->hasPaddr());
addressMonitor.pAddr = pkt->getAddr() & mask;
addressMonitor.waiting = true;
monitor.pAddr = pkt->getAddr() & mask;
monitor.waiting = true;
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
addressMonitor.vAddr, addressMonitor.pAddr);
DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
"line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
return true;
} else {
addressMonitor.gotWakeup = false;
monitor.gotWakeup = false;
return false;
}
}
void
BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb)
{
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
Request req;
Addr addr = addressMonitor.vAddr;
Addr addr = monitor.vAddr;
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
int size = block_size;
@ -320,11 +329,11 @@ BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
assert(fault == NoFault);
addressMonitor.pAddr = req.getPaddr() & mask;
addressMonitor.waiting = true;
monitor.pAddr = req.getPaddr() & mask;
monitor.waiting = true;
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
addressMonitor.vAddr, addressMonitor.pAddr);
DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
tid, monitor.vAddr, monitor.pAddr);
}
void

View file

@ -559,14 +559,17 @@ class BaseCPU : public MemObject
Stats::Scalar numWorkItemsCompleted;
private:
AddressMonitor addressMonitor;
std::vector<AddressMonitor> addressMonitor;
public:
void armMonitor(Addr address);
bool mwait(PacketPtr pkt);
void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
void atomicNotify(Addr address);
void armMonitor(ThreadID tid, Addr address);
bool mwait(ThreadID tid, PacketPtr pkt);
void mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb);
AddressMonitor *getCpuAddrMonitor(ThreadID tid)
{
assert(tid < numThreads);
return &addressMonitor[tid];
}
};
#endif // THE_ISA == NULL_ISA

View file

@ -863,11 +863,12 @@ class BaseDynInst : public ExecContext, public RefCounted
public:
// monitor/mwait funtions
void armMonitor(Addr address) { cpu->armMonitor(address); }
bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); }
bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); }
void mwaitAtomic(ThreadContext *tc)
{ return cpu->mwaitAtomic(tc, cpu->dtb); }
AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
{ return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); }
AddressMonitor *getAddrMonitor()
{ return cpu->getCpuAddrMonitor(threadNumber); }
};
template<class Impl>

View file

@ -350,11 +350,11 @@ class CheckerCPU : public BaseCPU, public ExecContext
}
// monitor/mwait funtions
virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
virtual void armMonitor(Addr address) { BaseCPU::armMonitor(0, address); }
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(0, pkt); }
void mwaitAtomic(ThreadContext *tc)
{ return BaseCPU::mwaitAtomic(tc, thread->dtb); }
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
{ return BaseCPU::mwaitAtomic(0, tc, thread->dtb); }
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(0); }
void demapInstPage(Addr vaddr, uint64_t asn)
{

View file

@ -343,12 +343,12 @@ class ExecContext : public ::ExecContext
public:
// monitor/mwait funtions
void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
void mwaitAtomic(ThreadContext *tc)
{ return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
{ return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
AddressMonitor *getAddrMonitor()
{ return getCpuPtr()->getCpuAddrMonitor(); }
{ return getCpuPtr()->getCpuAddrMonitor(0); }
};
}

View file

@ -135,7 +135,8 @@ Fetch1::fetchLine()
"%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
request_id, aligned_pc, pc, line_offset, request_size);
request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(),
/* thread id */ 0);
request->request.setVirt(0 /* asid */,
aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
/* I've no idea why we need the PC, but give it */

View file

@ -1501,7 +1501,8 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
if (inst->traceData)
inst->traceData->setMem(addr, size, flags);
request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
request->request.setThreadContext(cid, /* thread id */ 0);
request->request.setVirt(0 /* asid */,
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */

View file

@ -118,10 +118,11 @@ template <class Impl>
void
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
// X86 ISA: Snooping an invalidation for monitor/mwait
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
}
lsq->recvTimingSnoopReq(pkt);
}

View file

@ -86,9 +86,10 @@ AtomicSimpleCPU::init()
{
BaseSimpleCPU::init();
ifetch_req.setThreadContext(_cpuId, 0);
data_read_req.setThreadContext(_cpuId, 0);
data_write_req.setThreadContext(_cpuId, 0);
int cid = threadContexts[0]->contextId();
ifetch_req.setThreadContext(cid, 0);
data_read_req.setThreadContext(cid, 0);
data_write_req.setThreadContext(cid, 0);
}
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@ -130,6 +131,24 @@ AtomicSimpleCPU::drain()
}
}
void
AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
{
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (tid != sender) {
if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
wakeup();
}
TheISA::handleLockedSnoop(threadInfo[tid]->thread,
pkt, dcachePort.cacheBlockMask);
}
}
}
void
AtomicSimpleCPU::drainResume()
{
@ -265,9 +284,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
}
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
@ -289,9 +311,11 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
}
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
@ -460,6 +484,9 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
system->getPhysMem().access(&pkt);
else
dcache_latency += dcachePort.sendAtomic(&pkt);
// Notify other threads on this CPU of write
threadSnoop(&pkt, curThread);
}
dcache_access = true;
assert(!pkt.isError());
@ -516,9 +543,11 @@ AtomicSimpleCPU::tick()
// Set memroy request ids to current thread
if (numThreads > 1) {
ifetch_req.setThreadContext(_cpuId, curThread);
data_read_req.setThreadContext(_cpuId, curThread);
data_write_req.setThreadContext(_cpuId, curThread);
ContextID cid = threadContexts[curThread]->contextId();
ifetch_req.setThreadContext(cid, curThread);
data_read_req.setThreadContext(cid, curThread);
data_write_req.setThreadContext(cid, curThread);
}
SimpleExecContext& t_info = *threadInfo[curThread];

View file

@ -186,6 +186,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
/** Return a reference to the instruction port. */
virtual MasterPort &getInstPort() { return icachePort; }
/** Perform snoop for other cpu-local thread contexts. */
void threadSnoop(PacketPtr pkt, ThreadID sender);
public:
DrainState drain() M5_ATTR_OVERRIDE;

View file

@ -418,9 +418,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
void
BaseSimpleCPU::wakeup()
{
getCpuAddrMonitor()->gotWakeup = true;
for (ThreadID tid = 0; tid < numThreads; tid++) {
getCpuAddrMonitor(tid)->gotWakeup = true;
if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
DPRINTF(Quiesce,"Suspended Processor awoke\n");
threadInfo[tid]->thread->activate();

View file

@ -376,22 +376,22 @@ class SimpleExecContext : public ExecContext {
void armMonitor(Addr address) M5_ATTR_OVERRIDE
{
cpu->armMonitor(address);
cpu->armMonitor(thread->threadId(), address);
}
bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
{
return cpu->mwait(pkt);
return cpu->mwait(thread->threadId(), pkt);
}
void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
{
cpu->mwaitAtomic(tc, thread->dtb);
cpu->mwaitAtomic(thread->threadId(), tc, thread->dtb);
}
AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
{
return cpu->getCpuAddrMonitor();
return cpu->getCpuAddrMonitor(thread->threadId());
}
#if THE_ISA == MIPS_ISA

View file

@ -302,6 +302,7 @@ TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
if (do_access) {
dcache_pkt = pkt;
handleWritePacket();
threadSnoop(pkt, curThread);
} else {
_status = DcacheWaitResponse;
completeDataAccess(pkt);
@ -538,6 +539,19 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
return NoFault;
}
void
TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (tid != sender) {
if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
wakeup();
}
TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt,
dcachePort.cacheBlockMask);
}
}
}
void
TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
@ -849,10 +863,11 @@ TimingSimpleCPU::updateCycleCounts()
void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
// X86 ISA: Snooping an invalidation for monitor/mwait
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
}
for (auto &t_info : cpu->threadInfo) {
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
@ -862,10 +877,11 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
void
TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
{
// X86 ISA: Snooping an invalidation for monitor/mwait
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
cpu->wakeup();
}
}
}
bool

View file

@ -132,6 +132,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
};
FetchTranslation fetchTranslation;
void threadSnoop(PacketPtr pkt, ThreadID sender);
void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read);
void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req,
uint8_t *data, bool read);