cpu: Add per-thread monitors
Adds per-thread address monitors to support FullSystem SMT.
This commit is contained in:
parent
582a0148b4
commit
fafa83ed32
14 changed files with 128 additions and 64 deletions
|
@ -133,7 +133,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
|
|||
numThreads(p->numThreads), system(p->system),
|
||||
functionTraceStream(nullptr), currentFunctionStart(0),
|
||||
currentFunctionEnd(0), functionEntryTick(0),
|
||||
addressMonitor()
|
||||
addressMonitor(p->numThreads)
|
||||
{
|
||||
// if Python did not provide a valid ID, do it here
|
||||
if (_cpuId == -1 ) {
|
||||
|
@ -271,39 +271,48 @@ BaseCPU::~BaseCPU()
|
|||
}
|
||||
|
||||
void
|
||||
BaseCPU::armMonitor(Addr address)
|
||||
BaseCPU::armMonitor(ThreadID tid, Addr address)
|
||||
{
|
||||
addressMonitor.armed = true;
|
||||
addressMonitor.vAddr = address;
|
||||
addressMonitor.pAddr = 0x0;
|
||||
DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
|
||||
assert(tid < numThreads);
|
||||
AddressMonitor &monitor = addressMonitor[tid];
|
||||
|
||||
monitor.armed = true;
|
||||
monitor.vAddr = address;
|
||||
monitor.pAddr = 0x0;
|
||||
DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
|
||||
}
|
||||
|
||||
bool
|
||||
BaseCPU::mwait(PacketPtr pkt)
|
||||
BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
|
||||
{
|
||||
if(addressMonitor.gotWakeup == false) {
|
||||
assert(tid < numThreads);
|
||||
AddressMonitor &monitor = addressMonitor[tid];
|
||||
|
||||
if(monitor.gotWakeup == false) {
|
||||
int block_size = cacheLineSize();
|
||||
uint64_t mask = ~((uint64_t)(block_size - 1));
|
||||
|
||||
assert(pkt->req->hasPaddr());
|
||||
addressMonitor.pAddr = pkt->getAddr() & mask;
|
||||
addressMonitor.waiting = true;
|
||||
monitor.pAddr = pkt->getAddr() & mask;
|
||||
monitor.waiting = true;
|
||||
|
||||
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
|
||||
addressMonitor.vAddr, addressMonitor.pAddr);
|
||||
DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
|
||||
"line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
|
||||
return true;
|
||||
} else {
|
||||
addressMonitor.gotWakeup = false;
|
||||
monitor.gotWakeup = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
|
||||
BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb)
|
||||
{
|
||||
assert(tid < numThreads);
|
||||
AddressMonitor &monitor = addressMonitor[tid];
|
||||
|
||||
Request req;
|
||||
Addr addr = addressMonitor.vAddr;
|
||||
Addr addr = monitor.vAddr;
|
||||
int block_size = cacheLineSize();
|
||||
uint64_t mask = ~((uint64_t)(block_size - 1));
|
||||
int size = block_size;
|
||||
|
@ -320,11 +329,11 @@ BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
|
|||
Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
|
||||
assert(fault == NoFault);
|
||||
|
||||
addressMonitor.pAddr = req.getPaddr() & mask;
|
||||
addressMonitor.waiting = true;
|
||||
monitor.pAddr = req.getPaddr() & mask;
|
||||
monitor.waiting = true;
|
||||
|
||||
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
|
||||
addressMonitor.vAddr, addressMonitor.pAddr);
|
||||
DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
|
||||
tid, monitor.vAddr, monitor.pAddr);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -559,14 +559,17 @@ class BaseCPU : public MemObject
|
|||
Stats::Scalar numWorkItemsCompleted;
|
||||
|
||||
private:
|
||||
AddressMonitor addressMonitor;
|
||||
std::vector<AddressMonitor> addressMonitor;
|
||||
|
||||
public:
|
||||
void armMonitor(Addr address);
|
||||
bool mwait(PacketPtr pkt);
|
||||
void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
|
||||
AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
|
||||
void atomicNotify(Addr address);
|
||||
void armMonitor(ThreadID tid, Addr address);
|
||||
bool mwait(ThreadID tid, PacketPtr pkt);
|
||||
void mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb);
|
||||
AddressMonitor *getCpuAddrMonitor(ThreadID tid)
|
||||
{
|
||||
assert(tid < numThreads);
|
||||
return &addressMonitor[tid];
|
||||
}
|
||||
};
|
||||
|
||||
#endif // THE_ISA == NULL_ISA
|
||||
|
|
|
@ -863,11 +863,12 @@ class BaseDynInst : public ExecContext, public RefCounted
|
|||
|
||||
public:
|
||||
// monitor/mwait funtions
|
||||
void armMonitor(Addr address) { cpu->armMonitor(address); }
|
||||
bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
|
||||
void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); }
|
||||
bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); }
|
||||
void mwaitAtomic(ThreadContext *tc)
|
||||
{ return cpu->mwaitAtomic(tc, cpu->dtb); }
|
||||
AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
|
||||
{ return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); }
|
||||
AddressMonitor *getAddrMonitor()
|
||||
{ return cpu->getCpuAddrMonitor(threadNumber); }
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
|
|
|
@ -350,11 +350,11 @@ class CheckerCPU : public BaseCPU, public ExecContext
|
|||
}
|
||||
|
||||
// monitor/mwait funtions
|
||||
virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
|
||||
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
|
||||
virtual void armMonitor(Addr address) { BaseCPU::armMonitor(0, address); }
|
||||
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(0, pkt); }
|
||||
void mwaitAtomic(ThreadContext *tc)
|
||||
{ return BaseCPU::mwaitAtomic(tc, thread->dtb); }
|
||||
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
|
||||
{ return BaseCPU::mwaitAtomic(0, tc, thread->dtb); }
|
||||
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(0); }
|
||||
|
||||
void demapInstPage(Addr vaddr, uint64_t asn)
|
||||
{
|
||||
|
|
|
@ -343,12 +343,12 @@ class ExecContext : public ::ExecContext
|
|||
|
||||
public:
|
||||
// monitor/mwait funtions
|
||||
void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
|
||||
bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
|
||||
void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
|
||||
bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
|
||||
void mwaitAtomic(ThreadContext *tc)
|
||||
{ return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
|
||||
{ return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
|
||||
AddressMonitor *getAddrMonitor()
|
||||
{ return getCpuPtr()->getCpuAddrMonitor(); }
|
||||
{ return getCpuPtr()->getCpuAddrMonitor(0); }
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -135,7 +135,8 @@ Fetch1::fetchLine()
|
|||
"%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
|
||||
request_id, aligned_pc, pc, line_offset, request_size);
|
||||
|
||||
request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
|
||||
request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(),
|
||||
/* thread id */ 0);
|
||||
request->request.setVirt(0 /* asid */,
|
||||
aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
|
||||
/* I've no idea why we need the PC, but give it */
|
||||
|
|
|
@ -1501,7 +1501,8 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
|
|||
if (inst->traceData)
|
||||
inst->traceData->setMem(addr, size, flags);
|
||||
|
||||
request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
|
||||
int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
|
||||
request->request.setThreadContext(cid, /* thread id */ 0);
|
||||
request->request.setVirt(0 /* asid */,
|
||||
addr, size, flags, cpu.dataMasterId(),
|
||||
/* I've no idea why we need the PC, but give it */
|
||||
|
|
|
@ -118,10 +118,11 @@ template <class Impl>
|
|||
void
|
||||
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
||||
{
|
||||
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
|
||||
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
|
||||
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
cpu->wakeup();
|
||||
}
|
||||
}
|
||||
lsq->recvTimingSnoopReq(pkt);
|
||||
}
|
||||
|
||||
|
|
|
@ -86,9 +86,10 @@ AtomicSimpleCPU::init()
|
|||
{
|
||||
BaseSimpleCPU::init();
|
||||
|
||||
ifetch_req.setThreadContext(_cpuId, 0);
|
||||
data_read_req.setThreadContext(_cpuId, 0);
|
||||
data_write_req.setThreadContext(_cpuId, 0);
|
||||
int cid = threadContexts[0]->contextId();
|
||||
ifetch_req.setThreadContext(cid, 0);
|
||||
data_read_req.setThreadContext(cid, 0);
|
||||
data_write_req.setThreadContext(cid, 0);
|
||||
}
|
||||
|
||||
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
|
||||
|
@ -130,6 +131,24 @@ AtomicSimpleCPU::drain()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
|
||||
{
|
||||
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
|
||||
pkt->cmdString());
|
||||
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
if (tid != sender) {
|
||||
if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
wakeup();
|
||||
}
|
||||
|
||||
TheISA::handleLockedSnoop(threadInfo[tid]->thread,
|
||||
pkt, dcachePort.cacheBlockMask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
AtomicSimpleCPU::drainResume()
|
||||
{
|
||||
|
@ -265,9 +284,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
|
|||
|
||||
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
|
||||
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
|
||||
|
||||
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
|
||||
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
cpu->wakeup();
|
||||
}
|
||||
}
|
||||
|
||||
// if snoop invalidates, release any associated locks
|
||||
if (pkt->isInvalidate()) {
|
||||
|
@ -289,9 +311,11 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
|
|||
|
||||
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
|
||||
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
|
||||
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
|
||||
if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
cpu->wakeup();
|
||||
}
|
||||
}
|
||||
|
||||
// if snoop invalidates, release any associated locks
|
||||
if (pkt->isInvalidate()) {
|
||||
|
@ -460,6 +484,9 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
|
|||
system->getPhysMem().access(&pkt);
|
||||
else
|
||||
dcache_latency += dcachePort.sendAtomic(&pkt);
|
||||
|
||||
// Notify other threads on this CPU of write
|
||||
threadSnoop(&pkt, curThread);
|
||||
}
|
||||
dcache_access = true;
|
||||
assert(!pkt.isError());
|
||||
|
@ -516,9 +543,11 @@ AtomicSimpleCPU::tick()
|
|||
|
||||
// Set memroy request ids to current thread
|
||||
if (numThreads > 1) {
|
||||
ifetch_req.setThreadContext(_cpuId, curThread);
|
||||
data_read_req.setThreadContext(_cpuId, curThread);
|
||||
data_write_req.setThreadContext(_cpuId, curThread);
|
||||
ContextID cid = threadContexts[curThread]->contextId();
|
||||
|
||||
ifetch_req.setThreadContext(cid, curThread);
|
||||
data_read_req.setThreadContext(cid, curThread);
|
||||
data_write_req.setThreadContext(cid, curThread);
|
||||
}
|
||||
|
||||
SimpleExecContext& t_info = *threadInfo[curThread];
|
||||
|
|
|
@ -186,6 +186,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
|
|||
/** Return a reference to the instruction port. */
|
||||
virtual MasterPort &getInstPort() { return icachePort; }
|
||||
|
||||
/** Perform snoop for other cpu-local thread contexts. */
|
||||
void threadSnoop(PacketPtr pkt, ThreadID sender);
|
||||
|
||||
public:
|
||||
|
||||
DrainState drain() M5_ATTR_OVERRIDE;
|
||||
|
|
|
@ -418,9 +418,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
|
|||
void
|
||||
BaseSimpleCPU::wakeup()
|
||||
{
|
||||
getCpuAddrMonitor()->gotWakeup = true;
|
||||
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
getCpuAddrMonitor(tid)->gotWakeup = true;
|
||||
if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
|
||||
DPRINTF(Quiesce,"Suspended Processor awoke\n");
|
||||
threadInfo[tid]->thread->activate();
|
||||
|
|
|
@ -376,22 +376,22 @@ class SimpleExecContext : public ExecContext {
|
|||
|
||||
void armMonitor(Addr address) M5_ATTR_OVERRIDE
|
||||
{
|
||||
cpu->armMonitor(address);
|
||||
cpu->armMonitor(thread->threadId(), address);
|
||||
}
|
||||
|
||||
bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
|
||||
{
|
||||
return cpu->mwait(pkt);
|
||||
return cpu->mwait(thread->threadId(), pkt);
|
||||
}
|
||||
|
||||
void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
|
||||
{
|
||||
cpu->mwaitAtomic(tc, thread->dtb);
|
||||
cpu->mwaitAtomic(thread->threadId(), tc, thread->dtb);
|
||||
}
|
||||
|
||||
AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
|
||||
{
|
||||
return cpu->getCpuAddrMonitor();
|
||||
return cpu->getCpuAddrMonitor(thread->threadId());
|
||||
}
|
||||
|
||||
#if THE_ISA == MIPS_ISA
|
||||
|
|
|
@ -302,6 +302,7 @@ TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
|
|||
if (do_access) {
|
||||
dcache_pkt = pkt;
|
||||
handleWritePacket();
|
||||
threadSnoop(pkt, curThread);
|
||||
} else {
|
||||
_status = DcacheWaitResponse;
|
||||
completeDataAccess(pkt);
|
||||
|
@ -538,6 +539,19 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
|
|||
return NoFault;
|
||||
}
|
||||
|
||||
void
|
||||
TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
if (tid != sender) {
|
||||
if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
wakeup();
|
||||
}
|
||||
TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt,
|
||||
dcachePort.cacheBlockMask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
|
||||
|
@ -849,10 +863,11 @@ TimingSimpleCPU::updateCycleCounts()
|
|||
void
|
||||
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
||||
{
|
||||
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
|
||||
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
|
||||
if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
cpu->wakeup();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &t_info : cpu->threadInfo) {
|
||||
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
|
||||
|
@ -862,10 +877,11 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
|||
void
|
||||
TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
|
||||
{
|
||||
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
|
||||
for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
|
||||
if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
|
||||
cpu->wakeup();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -132,6 +132,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
|
|||
};
|
||||
FetchTranslation fetchTranslation;
|
||||
|
||||
void threadSnoop(PacketPtr pkt, ThreadID sender);
|
||||
void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read);
|
||||
void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req,
|
||||
uint8_t *data, bool read);
|
||||
|
|
Loading…
Reference in a new issue