diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index f9ded25f1..f5a2ddfbe 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -34,6 +34,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from Ruby import create_topology +from Ruby import send_evicts # # Note: the L1 Cache latency is only used by the sequencer on fast path hits @@ -101,7 +102,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, - send_evictions = (options.cpu_type == "detailed"), + send_evictions = send_evicts(options), clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index b7bdd1447..d911d76ef 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -32,6 +32,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from Ruby import create_topology +from Ruby import send_evicts # # Note: the L1 Cache latency is only used by the sequencer on fast path hits @@ -91,8 +92,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, - send_evictions = ( - options.cpu_type == "detailed"), + send_evictions = send_evicts(options), prefetcher = prefetcher, ruby_system = ruby_system, clk_domain=system.cpu[i].clk_domain, diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py index 2dd064b55..708e111e6 100644 --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -32,6 +32,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from Ruby import create_topology +from Ruby import send_evicts # # Note: the cache latency is only used by the sequencer on fast path hits @@ -79,8 +80,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): # l1_cntrl = L1Cache_Controller(version = i, cacheMemory = cache, - send_evictions = ( - options.cpu_type == "detailed"), + send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py index 9c4bab434..14ba33698 100644 --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -32,6 +32,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from Ruby import create_topology +from Ruby import send_evicts # # Note: the L1 Cache latency is only used by the sequencer on fast path hits @@ -89,8 +90,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, - send_evictions = ( - options.cpu_type == "detailed"), + send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py index 26cd625b5..42759b092 100644 --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -32,6 +32,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from Ruby import create_topology +from Ruby import send_evicts # # Note: the L1 Cache latency is only used by the sequencer on fast path hits @@ -109,8 +110,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): not options.disable_dyn_timeouts, no_mig_atomic = not \ options.allow_atomic_migration, - send_evictions = ( - options.cpu_type == "detailed"), + send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py index 740c6783e..571a645a6 100644 --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -32,6 +32,7 @@ import m5 from m5.objects import * from m5.defines import buildEnv from Ruby import create_topology +from Ruby import send_evicts # # Note: the L1 Cache latency is only used by the sequencer on fast path hits @@ -102,8 +103,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system): L2cache = l2_cache, no_mig_atomic = not \ options.allow_atomic_migration, - send_evictions = ( - options.cpu_type == "detailed"), + send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index b99e251d3..44d6bdfcc 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -233,6 +233,14 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []): ruby.num_of_sequencers = len(cpu_sequencers) ruby.random_seed = options.random_seed +def send_evicts(options): + # currently, 2 scenarios warrant forwarding evictions to the CPU: + # 1. The O3 model must keep the LSQ coherent with the caches + # 2. The x86 mwait instruction is built on top of coherence invalidations + if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86': + return True + return False + # Create a backing copy of physical memory in case required if options.access_backing_store: ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size), diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index eb395fce2..081bad971 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -71,8 +71,20 @@ } 0x1: decode MODRM_MOD { 0x3: decode MODRM_RM { - 0x0: monitor(); - 0x1: mwait(); + 0x0: MonitorInst::monitor({{ + xc->armMonitor(Rax); + }}); + 0x1: MwaitInst::mwait({{ + uint64_t m = 0; //mem + unsigned s = 0x8; //size + unsigned f = 0; //flags + readMemAtomic(xc, traceData, + xc->getAddrMonitor()->vAddr, + m, s, f); + xc->mwaitAtomic(xc->tcBase()); + MicroHalt hltObj(machInst, mnemonic, 0x0); + hltObj.execute(xc, traceData); + }}); default: Inst::UD2(); } default: sidt_Ms(); diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa index cc0eb9acf..b5ffd4d59 100644 --- a/src/arch/x86/isa/formats/formats.isa +++ b/src/arch/x86/isa/formats/formats.isa @@ -45,6 +45,9 @@ //Include a format to generate a CPUID instruction. ##include "cpuid.isa" +//Include a format to generate a monitor/mwait instructions. +##include "monitor_mwait.isa" + //Include the "unknown" format ##include "unknown.isa" diff --git a/src/arch/x86/isa/formats/monitor_mwait.isa b/src/arch/x86/isa/formats/monitor_mwait.isa new file mode 100644 index 000000000..493b7c58a --- /dev/null +++ b/src/arch/x86/isa/formats/monitor_mwait.isa @@ -0,0 +1,131 @@ +// Copyright (c) AMD +// All rights reserved. +// +// Authors: Marc Orr + +// Monitor Instruction + +output header {{ + class MonitorInst : public X86ISA::X86StaticInst + { + public: + static const RegIndex foldOBit = 0; + /// Constructor + MonitorInst(const char *_mnemonic, ExtMachInst _machInst, + OpClass __opClass) : + X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass) + { } + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + std::string MonitorInst::generateDisassembly(Addr PC, + const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, mnemonic); + ccprintf(response, " "); + printReg(response, _srcRegIdx[0], machInst.opSize); + return response.str(); + } +}}; + +def format MonitorInst(code, *opt_flags) {{ + iop = InstObjParams(name, Name, 'MonitorInst', code, opt_flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = BasicExecute.subst(iop) +}}; + + +// Mwait instruction + +// Declarations for execute() methods. +def template MwaitExecDeclare {{ + Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const; + Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const; + Fault completeAcc(PacketPtr, %(CPU_exec_context)s *, + Trace::InstRecord *) const; +}}; + +def template MwaitDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor. + %(class_name)s(ExtMachInst machInst); + %(MwaitExecDeclare)s + }; +}}; + +def template MwaitInitiateAcc {{ + Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc, + Trace::InstRecord * traceData) const + { + uint64_t m = 0; //mem + unsigned s = 0x8; //size + unsigned f = 0; //flags + readMemTiming(xc, traceData, xc->getAddrMonitor()->vAddr, m, s, f); + return NoFault; + } +}}; + +def template MwaitCompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, CPU_EXEC_CONTEXT *xc, + Trace::InstRecord *traceData) const + { + MicroHalt hltObj(machInst, mnemonic, 0x0); + if(xc->mwait(pkt)) { + hltObj.execute(xc, traceData); + } + return NoFault; + } +}}; + +output header {{ + class MwaitInst : public X86ISA::X86StaticInst + { + public: + static const RegIndex foldOBit = 0; + /// Constructor + MwaitInst(const char *_mnemonic, ExtMachInst _machInst, + OpClass __opClass) : + X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass) + { + flags[IsMemRef] = 1; + flags[IsLoad] = 1; + } + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + std::string MwaitInst::generateDisassembly(Addr PC, + const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, mnemonic); + ccprintf(response, " "); + printReg(response, _srcRegIdx[0], machInst.opSize); + return response.str(); + } +}}; + +def format MwaitInst(code, *opt_flags) {{ + iop = InstObjParams(name, Name, 'MwaitInst', code, opt_flags) + header_output = MwaitDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = BasicExecute.subst(iop) + exec_output += MwaitInitiateAcc.subst(iop) + exec_output += MwaitCompleteAcc.subst(iop) +}}; + diff --git a/src/cpu/SConscript b/src/cpu/SConscript index df29f6c73..570f5e2f1 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -102,6 +102,7 @@ DebugFlag('IntrControl') DebugFlag('O3PipeView') DebugFlag('PCEvent') DebugFlag('Quiesce') +DebugFlag('Mwait') CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr', 'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta', diff --git a/src/cpu/base.cc b/src/cpu/base.cc index ea4df2aa8..2f4745ee3 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -55,12 +55,14 @@ #include "base/misc.hh" #include "base/output.hh" #include "base/trace.hh" -#include "cpu/base.hh" #include "cpu/checker/cpu.hh" +#include "cpu/base.hh" #include "cpu/cpuevent.hh" #include "cpu/profile.hh" #include "cpu/thread_context.hh" +#include "debug/Mwait.hh" #include "debug/SyscallVerbose.hh" +#include "mem/page_table.hh" #include "params/BaseCPU.hh" #include "sim/full_system.hh" #include "sim/process.hh" @@ -123,7 +125,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker) _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid), _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()), interrupts(p->interrupts), profileEvent(NULL), - numThreads(p->numThreads), system(p->system) + numThreads(p->numThreads), system(p->system), + addressMonitor() { // if Python did not provide a valid ID, do it here if (_cpuId == -1 ) { @@ -260,6 +263,63 @@ BaseCPU::~BaseCPU() delete[] comInstEventQueue; } +void +BaseCPU::armMonitor(Addr address) +{ + addressMonitor.armed = true; + addressMonitor.vAddr = address; + addressMonitor.pAddr = 0x0; + DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address); +} + +bool +BaseCPU::mwait(PacketPtr pkt) +{ + if(addressMonitor.gotWakeup == false) { + int block_size = cacheLineSize(); + uint64_t mask = ~((uint64_t)(block_size - 1)); + + assert(pkt->req->hasPaddr()); + addressMonitor.pAddr = pkt->getAddr() & mask; + addressMonitor.waiting = true; + + DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", + addressMonitor.vAddr, addressMonitor.pAddr); + return true; + } else { + addressMonitor.gotWakeup = false; + return false; + } +} + +void +BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb) +{ + Request req; + Addr addr = addressMonitor.vAddr; + int block_size = cacheLineSize(); + uint64_t mask = ~((uint64_t)(block_size - 1)); + int size = block_size; + + //The address of the next line if it crosses a cache line boundary. + Addr secondAddr = roundDown(addr + size - 1, block_size); + + if (secondAddr > addr) + size = secondAddr - addr; + + req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr()); + + // translate to physical address + Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read); + assert(fault == NoFault); + + addressMonitor.pAddr = req.getPaddr() & mask; + addressMonitor.waiting = true; + + DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", + addressMonitor.vAddr, addressMonitor.pAddr); +} + void BaseCPU::init() { @@ -618,6 +678,25 @@ BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause) comInstEventQueue[tid]->schedule(event, now + insts); } +AddressMonitor::AddressMonitor() { + armed = false; + waiting = false; + gotWakeup = false; +} + +bool AddressMonitor::doMonitor(PacketPtr pkt) { + assert(pkt->req->hasPaddr()); + if(armed && waiting) { + if(pAddr == pkt->getAddr()) { + DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n", + pkt->getAddr()); + waiting = false; + return true; + } + } + return false; +} + void BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause) { diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 75c8f7263..3673a5f18 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -64,11 +64,26 @@ #include "sim/insttracer.hh" #include "sim/probe/pmu.hh" #include "sim/system.hh" +#include "debug/Mwait.hh" +class BaseCPU; struct BaseCPUParams; class CheckerCPU; class ThreadContext; +struct AddressMonitor +{ + AddressMonitor(); + bool doMonitor(PacketPtr pkt); + + bool armed; + Addr vAddr; + Addr pAddr; + uint64_t val; + bool waiting; // 0=normal, 1=mwaiting + bool gotWakeup; +}; + class CPUProgressEvent : public Event { protected: @@ -536,6 +551,16 @@ class BaseCPU : public MemObject Stats::Scalar numCycles; Stats::Scalar numWorkItemsStarted; Stats::Scalar numWorkItemsCompleted; + + private: + AddressMonitor addressMonitor; + + public: + void armMonitor(Addr address); + bool mwait(PacketPtr pkt); + void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb); + AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; } + void atomicNotify(Addr address); }; #endif // THE_ISA == NULL_ISA diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 289627c9a..af4d238e2 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -853,6 +853,14 @@ class BaseDynInst : public ExecContext, public RefCounted /** Sets the number of consecutive store conditional failures. */ void setStCondFailures(unsigned int sc_failures) { thread->storeCondFailures = sc_failures; } + + public: + // monitor/mwait funtions + void armMonitor(Addr address) { cpu->armMonitor(address); } + bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); } + void mwaitAtomic(ThreadContext *tc) + { return cpu->mwaitAtomic(tc, cpu->dtb); } + AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); } }; template diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index d684b142b..49f44ff00 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -349,6 +349,13 @@ class CheckerCPU : public BaseCPU, public ExecContext this->dtb->demapPage(vaddr, asn); } + // monitor/mwait funtions + virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); } + bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); } + void mwaitAtomic(ThreadContext *tc) + { return BaseCPU::mwaitAtomic(tc, thread->dtb); } + AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); } + void demapInstPage(Addr vaddr, uint64_t asn) { this->itb->demapPage(vaddr, asn); diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index c85a746ac..c65841db2 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -47,6 +47,7 @@ #include "arch/registers.hh" #include "base/types.hh" #include "config/the_isa.hh" +#include "cpu/base.hh" #include "cpu/static_inst_fwd.hh" #include "cpu/translation.hh" @@ -243,6 +244,10 @@ class ExecContext { * Invalidate a page in the DTLB and ITLB. */ virtual void demapPage(Addr vaddr, uint64_t asn) = 0; + virtual void armMonitor(Addr address) = 0; + virtual bool mwait(PacketPtr pkt) = 0; + virtual void mwaitAtomic(ThreadContext *tc) = 0; + virtual AddressMonitor *getAddrMonitor() = 0; /** @} */ diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 18281e636..c64cf9da4 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -602,3 +602,25 @@ InOrderDynInst::dump(std::string &outstring) outstring = s.str(); } + +void +InOrderDynInst::armMonitor(Addr address) { + cpu->armMonitor(address); +} + +bool +InOrderDynInst::mwait(PacketPtr pkt) { + return cpu->mwait(pkt); +} + +void +InOrderDynInst::mwaitAtomic(ThreadContext *tc) +{ + return cpu->mwaitAtomic(tc, cpu->getDTBPtr()); +} + +AddressMonitor * +InOrderDynInst::getAddrMonitor() +{ + return cpu->getCpuAddrMonitor(); +} diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 369ebe2f4..ebb7bf912 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -1077,6 +1077,13 @@ class InOrderDynInst : public ExecContext, public RefCounted void demapPage(Addr vaddr, uint64_t asn) { panic("demapPage unimplemented"); } + + public: + // monitor/mwait funtions + void armMonitor(Addr address); + bool mwait(PacketPtr pkt); + void mwaitAtomic(ThreadContext *tc); + AddressMonitor *getAddrMonitor(); }; diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index f1143498e..41345d3bd 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -340,6 +340,15 @@ class ExecContext : public ::ExecContext - TheISA::Misc_Reg_Base, val); } } + + public: + // monitor/mwait funtions + void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); } + bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); } + void mwaitAtomic(ThreadContext *tc) + { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); } + AddressMonitor *getAddrMonitor() + { return getCpuPtr()->getCpuAddrMonitor(); } }; } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index fd51cd123..55ef04ffc 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -117,6 +117,10 @@ template void FullO3CPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) { + // X86 ISA: Snooping an invalidation for monitor/mwait + if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { + cpu->wakeup(); + } lsq->recvTimingSnoopReq(pkt); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 96cd071e4..09b7db867 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -162,11 +162,13 @@ class FullO3CPU : public BaseO3CPU /** Pointer to LSQ. */ LSQ *lsq; + FullO3CPU *cpu; public: /** Default constructor. */ DcachePort(LSQ *_lsq, FullO3CPU* _cpu) - : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq) + : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), + cpu(_cpu) { } protected: diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index d6dbb9292..e98da3ea7 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -272,6 +272,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt) DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(), pkt->cmdString()); + // X86 ISA: Snooping an invalidation for monitor/mwait + AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); + if(cpu->getAddrMonitor()->doMonitor(pkt)) { + cpu->wakeup(); + } + // if snoop invalidates, release any associated locks if (pkt->isInvalidate()) { DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n", @@ -288,6 +294,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt) DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(), pkt->cmdString()); + // X86 ISA: Snooping an invalidation for monitor/mwait + AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); + if(cpu->getAddrMonitor()->doMonitor(pkt)) { + cpu->wakeup(); + } + // if snoop invalidates, release any associated locks if (pkt->isInvalidate()) { DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n", diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 60ab53999..636e08899 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -347,6 +347,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr) void BaseSimpleCPU::wakeup() { + getAddrMonitor()->gotWakeup = true; + if (thread->status() != ThreadContext::Suspended) return; diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 8f38a33c8..523bc9776 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -462,6 +462,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext private: TheISA::PCState pred_pc; + + public: + // monitor/mwait funtions + void armMonitor(Addr address) { BaseCPU::armMonitor(address); } + bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); } + void mwaitAtomic(ThreadContext *tc) + { return BaseCPU::mwaitAtomic(tc, thread->dtb); } + AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); } }; #endif // __CPU_SIMPLE_BASE_HH__ diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 84a2c09fd..5bfc9799d 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -58,6 +58,8 @@ #include "sim/full_system.hh" #include "sim/system.hh" +#include "debug/Mwait.hh" + using namespace std; using namespace TheISA; @@ -818,9 +820,21 @@ TimingSimpleCPU::updateCycleCounts() void TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) { + // X86 ISA: Snooping an invalidation for monitor/mwait + if(cpu->getAddrMonitor()->doMonitor(pkt)) { + cpu->wakeup(); + } TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask); } +void +TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt) +{ + // X86 ISA: Snooping an invalidation for monitor/mwait + if(cpu->getAddrMonitor()->doMonitor(pkt)) { + cpu->wakeup(); + } +} bool TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt) diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 84c8f7418..52eb6b1ba 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -228,11 +228,16 @@ class TimingSimpleCPU : public BaseSimpleCPU * a wakeup event on a cpu that is monitoring an address */ virtual void recvTimingSnoopReq(PacketPtr pkt); + virtual void recvFunctionalSnoop(PacketPtr pkt); virtual bool recvTimingResp(PacketPtr pkt); virtual void recvRetry(); + virtual bool isSnooping() const { + return true; + } + struct DTickEvent : public TickEvent { DTickEvent(TimingSimpleCPU *_cpu)