x86 isa: This patch attempts an implementation at mwait.
Mwait works as follows: 1. A cpu monitors an address of interest (monitor instruction) 2. A cpu calls mwait - this loads the cache line into that cpu's cache. 3. The cpu goes to sleep. 4. When another processor requests write permission for the line, it is evicted from the sleeping cpu's cache. This eviction is forwarded to the sleeping cpu, which then wakes up. Committed by: Nilay Vaish <nilay@cs.wisc.edu>
This commit is contained in:
parent
3947f88d0f
commit
bf80734b2c
26 changed files with 381 additions and 16 deletions
|
@ -34,6 +34,7 @@ import m5
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
from m5.defines import buildEnv
|
from m5.defines import buildEnv
|
||||||
from Ruby import create_topology
|
from Ruby import create_topology
|
||||||
|
from Ruby import send_evicts
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
||||||
|
@ -101,7 +102,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
|
||||||
|
|
||||||
l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
|
l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
|
||||||
Icache = l0i_cache, Dcache = l0d_cache,
|
Icache = l0i_cache, Dcache = l0d_cache,
|
||||||
send_evictions = (options.cpu_type == "detailed"),
|
send_evictions = send_evicts(options),
|
||||||
clk_domain=system.cpu[i].clk_domain,
|
clk_domain=system.cpu[i].clk_domain,
|
||||||
ruby_system = ruby_system)
|
ruby_system = ruby_system)
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ import m5
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
from m5.defines import buildEnv
|
from m5.defines import buildEnv
|
||||||
from Ruby import create_topology
|
from Ruby import create_topology
|
||||||
|
from Ruby import send_evicts
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
||||||
|
@ -91,8 +92,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
|
||||||
L1Icache = l1i_cache,
|
L1Icache = l1i_cache,
|
||||||
L1Dcache = l1d_cache,
|
L1Dcache = l1d_cache,
|
||||||
l2_select_num_bits = l2_bits,
|
l2_select_num_bits = l2_bits,
|
||||||
send_evictions = (
|
send_evictions = send_evicts(options),
|
||||||
options.cpu_type == "detailed"),
|
|
||||||
prefetcher = prefetcher,
|
prefetcher = prefetcher,
|
||||||
ruby_system = ruby_system,
|
ruby_system = ruby_system,
|
||||||
clk_domain=system.cpu[i].clk_domain,
|
clk_domain=system.cpu[i].clk_domain,
|
||||||
|
|
|
@ -32,6 +32,7 @@ import m5
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
from m5.defines import buildEnv
|
from m5.defines import buildEnv
|
||||||
from Ruby import create_topology
|
from Ruby import create_topology
|
||||||
|
from Ruby import send_evicts
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: the cache latency is only used by the sequencer on fast path hits
|
# Note: the cache latency is only used by the sequencer on fast path hits
|
||||||
|
@ -79,8 +80,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
|
||||||
#
|
#
|
||||||
l1_cntrl = L1Cache_Controller(version = i,
|
l1_cntrl = L1Cache_Controller(version = i,
|
||||||
cacheMemory = cache,
|
cacheMemory = cache,
|
||||||
send_evictions = (
|
send_evictions = send_evicts(options),
|
||||||
options.cpu_type == "detailed"),
|
|
||||||
transitions_per_cycle = options.ports,
|
transitions_per_cycle = options.ports,
|
||||||
clk_domain=system.cpu[i].clk_domain,
|
clk_domain=system.cpu[i].clk_domain,
|
||||||
ruby_system = ruby_system)
|
ruby_system = ruby_system)
|
||||||
|
|
|
@ -32,6 +32,7 @@ import m5
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
from m5.defines import buildEnv
|
from m5.defines import buildEnv
|
||||||
from Ruby import create_topology
|
from Ruby import create_topology
|
||||||
|
from Ruby import send_evicts
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
||||||
|
@ -89,8 +90,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
|
||||||
L1Icache = l1i_cache,
|
L1Icache = l1i_cache,
|
||||||
L1Dcache = l1d_cache,
|
L1Dcache = l1d_cache,
|
||||||
l2_select_num_bits = l2_bits,
|
l2_select_num_bits = l2_bits,
|
||||||
send_evictions = (
|
send_evictions = send_evicts(options),
|
||||||
options.cpu_type == "detailed"),
|
|
||||||
transitions_per_cycle = options.ports,
|
transitions_per_cycle = options.ports,
|
||||||
clk_domain=system.cpu[i].clk_domain,
|
clk_domain=system.cpu[i].clk_domain,
|
||||||
ruby_system = ruby_system)
|
ruby_system = ruby_system)
|
||||||
|
|
|
@ -32,6 +32,7 @@ import m5
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
from m5.defines import buildEnv
|
from m5.defines import buildEnv
|
||||||
from Ruby import create_topology
|
from Ruby import create_topology
|
||||||
|
from Ruby import send_evicts
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
||||||
|
@ -109,8 +110,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
|
||||||
not options.disable_dyn_timeouts,
|
not options.disable_dyn_timeouts,
|
||||||
no_mig_atomic = not \
|
no_mig_atomic = not \
|
||||||
options.allow_atomic_migration,
|
options.allow_atomic_migration,
|
||||||
send_evictions = (
|
send_evictions = send_evicts(options),
|
||||||
options.cpu_type == "detailed"),
|
|
||||||
transitions_per_cycle = options.ports,
|
transitions_per_cycle = options.ports,
|
||||||
clk_domain=system.cpu[i].clk_domain,
|
clk_domain=system.cpu[i].clk_domain,
|
||||||
ruby_system = ruby_system)
|
ruby_system = ruby_system)
|
||||||
|
|
|
@ -32,6 +32,7 @@ import m5
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
from m5.defines import buildEnv
|
from m5.defines import buildEnv
|
||||||
from Ruby import create_topology
|
from Ruby import create_topology
|
||||||
|
from Ruby import send_evicts
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
|
||||||
|
@ -102,8 +103,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
|
||||||
L2cache = l2_cache,
|
L2cache = l2_cache,
|
||||||
no_mig_atomic = not \
|
no_mig_atomic = not \
|
||||||
options.allow_atomic_migration,
|
options.allow_atomic_migration,
|
||||||
send_evictions = (
|
send_evictions = send_evicts(options),
|
||||||
options.cpu_type == "detailed"),
|
|
||||||
transitions_per_cycle = options.ports,
|
transitions_per_cycle = options.ports,
|
||||||
clk_domain=system.cpu[i].clk_domain,
|
clk_domain=system.cpu[i].clk_domain,
|
||||||
ruby_system = ruby_system)
|
ruby_system = ruby_system)
|
||||||
|
|
|
@ -233,6 +233,14 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []):
|
||||||
ruby.num_of_sequencers = len(cpu_sequencers)
|
ruby.num_of_sequencers = len(cpu_sequencers)
|
||||||
ruby.random_seed = options.random_seed
|
ruby.random_seed = options.random_seed
|
||||||
|
|
||||||
|
def send_evicts(options):
|
||||||
|
# currently, 2 scenarios warrant forwarding evictions to the CPU:
|
||||||
|
# 1. The O3 model must keep the LSQ coherent with the caches
|
||||||
|
# 2. The x86 mwait instruction is built on top of coherence invalidations
|
||||||
|
if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86':
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
# Create a backing copy of physical memory in case required
|
# Create a backing copy of physical memory in case required
|
||||||
if options.access_backing_store:
|
if options.access_backing_store:
|
||||||
ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),
|
ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),
|
||||||
|
|
|
@ -71,8 +71,20 @@
|
||||||
}
|
}
|
||||||
0x1: decode MODRM_MOD {
|
0x1: decode MODRM_MOD {
|
||||||
0x3: decode MODRM_RM {
|
0x3: decode MODRM_RM {
|
||||||
0x0: monitor();
|
0x0: MonitorInst::monitor({{
|
||||||
0x1: mwait();
|
xc->armMonitor(Rax);
|
||||||
|
}});
|
||||||
|
0x1: MwaitInst::mwait({{
|
||||||
|
uint64_t m = 0; //mem
|
||||||
|
unsigned s = 0x8; //size
|
||||||
|
unsigned f = 0; //flags
|
||||||
|
readMemAtomic(xc, traceData,
|
||||||
|
xc->getAddrMonitor()->vAddr,
|
||||||
|
m, s, f);
|
||||||
|
xc->mwaitAtomic(xc->tcBase());
|
||||||
|
MicroHalt hltObj(machInst, mnemonic, 0x0);
|
||||||
|
hltObj.execute(xc, traceData);
|
||||||
|
}});
|
||||||
default: Inst::UD2();
|
default: Inst::UD2();
|
||||||
}
|
}
|
||||||
default: sidt_Ms();
|
default: sidt_Ms();
|
||||||
|
|
|
@ -45,6 +45,9 @@
|
||||||
//Include a format to generate a CPUID instruction.
|
//Include a format to generate a CPUID instruction.
|
||||||
##include "cpuid.isa"
|
##include "cpuid.isa"
|
||||||
|
|
||||||
|
//Include a format to generate a monitor/mwait instructions.
|
||||||
|
##include "monitor_mwait.isa"
|
||||||
|
|
||||||
//Include the "unknown" format
|
//Include the "unknown" format
|
||||||
##include "unknown.isa"
|
##include "unknown.isa"
|
||||||
|
|
||||||
|
|
131
src/arch/x86/isa/formats/monitor_mwait.isa
Normal file
131
src/arch/x86/isa/formats/monitor_mwait.isa
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
// Copyright (c) AMD
|
||||||
|
// All rights reserved.
|
||||||
|
//
|
||||||
|
// Authors: Marc Orr
|
||||||
|
|
||||||
|
// Monitor Instruction
|
||||||
|
|
||||||
|
output header {{
|
||||||
|
class MonitorInst : public X86ISA::X86StaticInst
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const RegIndex foldOBit = 0;
|
||||||
|
/// Constructor
|
||||||
|
MonitorInst(const char *_mnemonic, ExtMachInst _machInst,
|
||||||
|
OpClass __opClass) :
|
||||||
|
X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
std::string generateDisassembly(Addr pc,
|
||||||
|
const SymbolTable *symtab) const;
|
||||||
|
};
|
||||||
|
}};
|
||||||
|
|
||||||
|
output decoder {{
|
||||||
|
std::string MonitorInst::generateDisassembly(Addr PC,
|
||||||
|
const SymbolTable *symtab) const
|
||||||
|
{
|
||||||
|
std::stringstream response;
|
||||||
|
|
||||||
|
printMnemonic(response, mnemonic);
|
||||||
|
ccprintf(response, " ");
|
||||||
|
printReg(response, _srcRegIdx[0], machInst.opSize);
|
||||||
|
return response.str();
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
def format MonitorInst(code, *opt_flags) {{
|
||||||
|
iop = InstObjParams(name, Name, 'MonitorInst', code, opt_flags)
|
||||||
|
header_output = BasicDeclare.subst(iop)
|
||||||
|
decoder_output = BasicConstructor.subst(iop)
|
||||||
|
decode_block = BasicDecode.subst(iop)
|
||||||
|
exec_output = BasicExecute.subst(iop)
|
||||||
|
}};
|
||||||
|
|
||||||
|
|
||||||
|
// Mwait instruction
|
||||||
|
|
||||||
|
// Declarations for execute() methods.
|
||||||
|
def template MwaitExecDeclare {{
|
||||||
|
Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||||
|
Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||||
|
Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
|
||||||
|
Trace::InstRecord *) const;
|
||||||
|
}};
|
||||||
|
|
||||||
|
def template MwaitDeclare {{
|
||||||
|
class %(class_name)s : public %(base_class)s
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// Constructor.
|
||||||
|
%(class_name)s(ExtMachInst machInst);
|
||||||
|
%(MwaitExecDeclare)s
|
||||||
|
};
|
||||||
|
}};
|
||||||
|
|
||||||
|
def template MwaitInitiateAcc {{
|
||||||
|
Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc,
|
||||||
|
Trace::InstRecord * traceData) const
|
||||||
|
{
|
||||||
|
uint64_t m = 0; //mem
|
||||||
|
unsigned s = 0x8; //size
|
||||||
|
unsigned f = 0; //flags
|
||||||
|
readMemTiming(xc, traceData, xc->getAddrMonitor()->vAddr, m, s, f);
|
||||||
|
return NoFault;
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
def template MwaitCompleteAcc {{
|
||||||
|
Fault %(class_name)s::completeAcc(PacketPtr pkt, CPU_EXEC_CONTEXT *xc,
|
||||||
|
Trace::InstRecord *traceData) const
|
||||||
|
{
|
||||||
|
MicroHalt hltObj(machInst, mnemonic, 0x0);
|
||||||
|
if(xc->mwait(pkt)) {
|
||||||
|
hltObj.execute(xc, traceData);
|
||||||
|
}
|
||||||
|
return NoFault;
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
output header {{
|
||||||
|
class MwaitInst : public X86ISA::X86StaticInst
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const RegIndex foldOBit = 0;
|
||||||
|
/// Constructor
|
||||||
|
MwaitInst(const char *_mnemonic, ExtMachInst _machInst,
|
||||||
|
OpClass __opClass) :
|
||||||
|
X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
|
||||||
|
{
|
||||||
|
flags[IsMemRef] = 1;
|
||||||
|
flags[IsLoad] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string generateDisassembly(Addr pc,
|
||||||
|
const SymbolTable *symtab) const;
|
||||||
|
};
|
||||||
|
}};
|
||||||
|
|
||||||
|
output decoder {{
|
||||||
|
std::string MwaitInst::generateDisassembly(Addr PC,
|
||||||
|
const SymbolTable *symtab) const
|
||||||
|
{
|
||||||
|
std::stringstream response;
|
||||||
|
|
||||||
|
printMnemonic(response, mnemonic);
|
||||||
|
ccprintf(response, " ");
|
||||||
|
printReg(response, _srcRegIdx[0], machInst.opSize);
|
||||||
|
return response.str();
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
def format MwaitInst(code, *opt_flags) {{
|
||||||
|
iop = InstObjParams(name, Name, 'MwaitInst', code, opt_flags)
|
||||||
|
header_output = MwaitDeclare.subst(iop)
|
||||||
|
decoder_output = BasicConstructor.subst(iop)
|
||||||
|
decode_block = BasicDecode.subst(iop)
|
||||||
|
exec_output = BasicExecute.subst(iop)
|
||||||
|
exec_output += MwaitInitiateAcc.subst(iop)
|
||||||
|
exec_output += MwaitCompleteAcc.subst(iop)
|
||||||
|
}};
|
||||||
|
|
|
@ -102,6 +102,7 @@ DebugFlag('IntrControl')
|
||||||
DebugFlag('O3PipeView')
|
DebugFlag('O3PipeView')
|
||||||
DebugFlag('PCEvent')
|
DebugFlag('PCEvent')
|
||||||
DebugFlag('Quiesce')
|
DebugFlag('Quiesce')
|
||||||
|
DebugFlag('Mwait')
|
||||||
|
|
||||||
CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr',
|
CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr',
|
||||||
'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta',
|
'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta',
|
||||||
|
|
|
@ -55,12 +55,14 @@
|
||||||
#include "base/misc.hh"
|
#include "base/misc.hh"
|
||||||
#include "base/output.hh"
|
#include "base/output.hh"
|
||||||
#include "base/trace.hh"
|
#include "base/trace.hh"
|
||||||
#include "cpu/base.hh"
|
|
||||||
#include "cpu/checker/cpu.hh"
|
#include "cpu/checker/cpu.hh"
|
||||||
|
#include "cpu/base.hh"
|
||||||
#include "cpu/cpuevent.hh"
|
#include "cpu/cpuevent.hh"
|
||||||
#include "cpu/profile.hh"
|
#include "cpu/profile.hh"
|
||||||
#include "cpu/thread_context.hh"
|
#include "cpu/thread_context.hh"
|
||||||
|
#include "debug/Mwait.hh"
|
||||||
#include "debug/SyscallVerbose.hh"
|
#include "debug/SyscallVerbose.hh"
|
||||||
|
#include "mem/page_table.hh"
|
||||||
#include "params/BaseCPU.hh"
|
#include "params/BaseCPU.hh"
|
||||||
#include "sim/full_system.hh"
|
#include "sim/full_system.hh"
|
||||||
#include "sim/process.hh"
|
#include "sim/process.hh"
|
||||||
|
@ -123,7 +125,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
|
||||||
_taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
|
_taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
|
||||||
_switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
|
_switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
|
||||||
interrupts(p->interrupts), profileEvent(NULL),
|
interrupts(p->interrupts), profileEvent(NULL),
|
||||||
numThreads(p->numThreads), system(p->system)
|
numThreads(p->numThreads), system(p->system),
|
||||||
|
addressMonitor()
|
||||||
{
|
{
|
||||||
// if Python did not provide a valid ID, do it here
|
// if Python did not provide a valid ID, do it here
|
||||||
if (_cpuId == -1 ) {
|
if (_cpuId == -1 ) {
|
||||||
|
@ -260,6 +263,63 @@ BaseCPU::~BaseCPU()
|
||||||
delete[] comInstEventQueue;
|
delete[] comInstEventQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BaseCPU::armMonitor(Addr address)
|
||||||
|
{
|
||||||
|
addressMonitor.armed = true;
|
||||||
|
addressMonitor.vAddr = address;
|
||||||
|
addressMonitor.pAddr = 0x0;
|
||||||
|
DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
BaseCPU::mwait(PacketPtr pkt)
|
||||||
|
{
|
||||||
|
if(addressMonitor.gotWakeup == false) {
|
||||||
|
int block_size = cacheLineSize();
|
||||||
|
uint64_t mask = ~((uint64_t)(block_size - 1));
|
||||||
|
|
||||||
|
assert(pkt->req->hasPaddr());
|
||||||
|
addressMonitor.pAddr = pkt->getAddr() & mask;
|
||||||
|
addressMonitor.waiting = true;
|
||||||
|
|
||||||
|
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
|
||||||
|
addressMonitor.vAddr, addressMonitor.pAddr);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
addressMonitor.gotWakeup = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
|
||||||
|
{
|
||||||
|
Request req;
|
||||||
|
Addr addr = addressMonitor.vAddr;
|
||||||
|
int block_size = cacheLineSize();
|
||||||
|
uint64_t mask = ~((uint64_t)(block_size - 1));
|
||||||
|
int size = block_size;
|
||||||
|
|
||||||
|
//The address of the next line if it crosses a cache line boundary.
|
||||||
|
Addr secondAddr = roundDown(addr + size - 1, block_size);
|
||||||
|
|
||||||
|
if (secondAddr > addr)
|
||||||
|
size = secondAddr - addr;
|
||||||
|
|
||||||
|
req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
|
||||||
|
|
||||||
|
// translate to physical address
|
||||||
|
Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
|
||||||
|
assert(fault == NoFault);
|
||||||
|
|
||||||
|
addressMonitor.pAddr = req.getPaddr() & mask;
|
||||||
|
addressMonitor.waiting = true;
|
||||||
|
|
||||||
|
DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
|
||||||
|
addressMonitor.vAddr, addressMonitor.pAddr);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BaseCPU::init()
|
BaseCPU::init()
|
||||||
{
|
{
|
||||||
|
@ -618,6 +678,25 @@ BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
|
||||||
comInstEventQueue[tid]->schedule(event, now + insts);
|
comInstEventQueue[tid]->schedule(event, now + insts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AddressMonitor::AddressMonitor() {
|
||||||
|
armed = false;
|
||||||
|
waiting = false;
|
||||||
|
gotWakeup = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AddressMonitor::doMonitor(PacketPtr pkt) {
|
||||||
|
assert(pkt->req->hasPaddr());
|
||||||
|
if(armed && waiting) {
|
||||||
|
if(pAddr == pkt->getAddr()) {
|
||||||
|
DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
|
||||||
|
pkt->getAddr());
|
||||||
|
waiting = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
|
BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
|
||||||
{
|
{
|
||||||
|
|
|
@ -64,11 +64,26 @@
|
||||||
#include "sim/insttracer.hh"
|
#include "sim/insttracer.hh"
|
||||||
#include "sim/probe/pmu.hh"
|
#include "sim/probe/pmu.hh"
|
||||||
#include "sim/system.hh"
|
#include "sim/system.hh"
|
||||||
|
#include "debug/Mwait.hh"
|
||||||
|
|
||||||
|
class BaseCPU;
|
||||||
struct BaseCPUParams;
|
struct BaseCPUParams;
|
||||||
class CheckerCPU;
|
class CheckerCPU;
|
||||||
class ThreadContext;
|
class ThreadContext;
|
||||||
|
|
||||||
|
struct AddressMonitor
|
||||||
|
{
|
||||||
|
AddressMonitor();
|
||||||
|
bool doMonitor(PacketPtr pkt);
|
||||||
|
|
||||||
|
bool armed;
|
||||||
|
Addr vAddr;
|
||||||
|
Addr pAddr;
|
||||||
|
uint64_t val;
|
||||||
|
bool waiting; // 0=normal, 1=mwaiting
|
||||||
|
bool gotWakeup;
|
||||||
|
};
|
||||||
|
|
||||||
class CPUProgressEvent : public Event
|
class CPUProgressEvent : public Event
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
|
@ -536,6 +551,16 @@ class BaseCPU : public MemObject
|
||||||
Stats::Scalar numCycles;
|
Stats::Scalar numCycles;
|
||||||
Stats::Scalar numWorkItemsStarted;
|
Stats::Scalar numWorkItemsStarted;
|
||||||
Stats::Scalar numWorkItemsCompleted;
|
Stats::Scalar numWorkItemsCompleted;
|
||||||
|
|
||||||
|
private:
|
||||||
|
AddressMonitor addressMonitor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void armMonitor(Addr address);
|
||||||
|
bool mwait(PacketPtr pkt);
|
||||||
|
void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
|
||||||
|
AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
|
||||||
|
void atomicNotify(Addr address);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // THE_ISA == NULL_ISA
|
#endif // THE_ISA == NULL_ISA
|
||||||
|
|
|
@ -853,6 +853,14 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||||
/** Sets the number of consecutive store conditional failures. */
|
/** Sets the number of consecutive store conditional failures. */
|
||||||
void setStCondFailures(unsigned int sc_failures)
|
void setStCondFailures(unsigned int sc_failures)
|
||||||
{ thread->storeCondFailures = sc_failures; }
|
{ thread->storeCondFailures = sc_failures; }
|
||||||
|
|
||||||
|
public:
|
||||||
|
// monitor/mwait funtions
|
||||||
|
void armMonitor(Addr address) { cpu->armMonitor(address); }
|
||||||
|
bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
|
||||||
|
void mwaitAtomic(ThreadContext *tc)
|
||||||
|
{ return cpu->mwaitAtomic(tc, cpu->dtb); }
|
||||||
|
AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
|
|
@ -349,6 +349,13 @@ class CheckerCPU : public BaseCPU, public ExecContext
|
||||||
this->dtb->demapPage(vaddr, asn);
|
this->dtb->demapPage(vaddr, asn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// monitor/mwait funtions
|
||||||
|
virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
|
||||||
|
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
|
||||||
|
void mwaitAtomic(ThreadContext *tc)
|
||||||
|
{ return BaseCPU::mwaitAtomic(tc, thread->dtb); }
|
||||||
|
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
|
||||||
|
|
||||||
void demapInstPage(Addr vaddr, uint64_t asn)
|
void demapInstPage(Addr vaddr, uint64_t asn)
|
||||||
{
|
{
|
||||||
this->itb->demapPage(vaddr, asn);
|
this->itb->demapPage(vaddr, asn);
|
||||||
|
|
|
@ -47,6 +47,7 @@
|
||||||
#include "arch/registers.hh"
|
#include "arch/registers.hh"
|
||||||
#include "base/types.hh"
|
#include "base/types.hh"
|
||||||
#include "config/the_isa.hh"
|
#include "config/the_isa.hh"
|
||||||
|
#include "cpu/base.hh"
|
||||||
#include "cpu/static_inst_fwd.hh"
|
#include "cpu/static_inst_fwd.hh"
|
||||||
#include "cpu/translation.hh"
|
#include "cpu/translation.hh"
|
||||||
|
|
||||||
|
@ -243,6 +244,10 @@ class ExecContext {
|
||||||
* Invalidate a page in the DTLB <i>and</i> ITLB.
|
* Invalidate a page in the DTLB <i>and</i> ITLB.
|
||||||
*/
|
*/
|
||||||
virtual void demapPage(Addr vaddr, uint64_t asn) = 0;
|
virtual void demapPage(Addr vaddr, uint64_t asn) = 0;
|
||||||
|
virtual void armMonitor(Addr address) = 0;
|
||||||
|
virtual bool mwait(PacketPtr pkt) = 0;
|
||||||
|
virtual void mwaitAtomic(ThreadContext *tc) = 0;
|
||||||
|
virtual AddressMonitor *getAddrMonitor() = 0;
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
|
|
@ -602,3 +602,25 @@ InOrderDynInst::dump(std::string &outstring)
|
||||||
|
|
||||||
outstring = s.str();
|
outstring = s.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
InOrderDynInst::armMonitor(Addr address) {
|
||||||
|
cpu->armMonitor(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
InOrderDynInst::mwait(PacketPtr pkt) {
|
||||||
|
return cpu->mwait(pkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
InOrderDynInst::mwaitAtomic(ThreadContext *tc)
|
||||||
|
{
|
||||||
|
return cpu->mwaitAtomic(tc, cpu->getDTBPtr());
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressMonitor *
|
||||||
|
InOrderDynInst::getAddrMonitor()
|
||||||
|
{
|
||||||
|
return cpu->getCpuAddrMonitor();
|
||||||
|
}
|
||||||
|
|
|
@ -1077,6 +1077,13 @@ class InOrderDynInst : public ExecContext, public RefCounted
|
||||||
void demapPage(Addr vaddr, uint64_t asn) {
|
void demapPage(Addr vaddr, uint64_t asn) {
|
||||||
panic("demapPage unimplemented");
|
panic("demapPage unimplemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// monitor/mwait funtions
|
||||||
|
void armMonitor(Addr address);
|
||||||
|
bool mwait(PacketPtr pkt);
|
||||||
|
void mwaitAtomic(ThreadContext *tc);
|
||||||
|
AddressMonitor *getAddrMonitor();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -340,6 +340,15 @@ class ExecContext : public ::ExecContext
|
||||||
- TheISA::Misc_Reg_Base, val);
|
- TheISA::Misc_Reg_Base, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// monitor/mwait funtions
|
||||||
|
void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
|
||||||
|
bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
|
||||||
|
void mwaitAtomic(ThreadContext *tc)
|
||||||
|
{ return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
|
||||||
|
AddressMonitor *getAddrMonitor()
|
||||||
|
{ return getCpuPtr()->getCpuAddrMonitor(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,6 +117,10 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
||||||
{
|
{
|
||||||
|
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||||
|
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
|
||||||
|
cpu->wakeup();
|
||||||
|
}
|
||||||
lsq->recvTimingSnoopReq(pkt);
|
lsq->recvTimingSnoopReq(pkt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -162,11 +162,13 @@ class FullO3CPU : public BaseO3CPU
|
||||||
|
|
||||||
/** Pointer to LSQ. */
|
/** Pointer to LSQ. */
|
||||||
LSQ<Impl> *lsq;
|
LSQ<Impl> *lsq;
|
||||||
|
FullO3CPU<Impl> *cpu;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** Default constructor. */
|
/** Default constructor. */
|
||||||
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
|
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
|
||||||
: MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq)
|
: MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
|
||||||
|
cpu(_cpu)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -272,6 +272,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
|
||||||
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
|
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
|
||||||
pkt->cmdString());
|
pkt->cmdString());
|
||||||
|
|
||||||
|
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||||
|
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
|
||||||
|
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
|
||||||
|
cpu->wakeup();
|
||||||
|
}
|
||||||
|
|
||||||
// if snoop invalidates, release any associated locks
|
// if snoop invalidates, release any associated locks
|
||||||
if (pkt->isInvalidate()) {
|
if (pkt->isInvalidate()) {
|
||||||
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
|
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
|
||||||
|
@ -288,6 +294,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
|
||||||
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
|
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
|
||||||
pkt->cmdString());
|
pkt->cmdString());
|
||||||
|
|
||||||
|
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||||
|
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
|
||||||
|
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
|
||||||
|
cpu->wakeup();
|
||||||
|
}
|
||||||
|
|
||||||
// if snoop invalidates, release any associated locks
|
// if snoop invalidates, release any associated locks
|
||||||
if (pkt->isInvalidate()) {
|
if (pkt->isInvalidate()) {
|
||||||
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
|
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
|
||||||
|
|
|
@ -347,6 +347,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
|
||||||
void
|
void
|
||||||
BaseSimpleCPU::wakeup()
|
BaseSimpleCPU::wakeup()
|
||||||
{
|
{
|
||||||
|
getAddrMonitor()->gotWakeup = true;
|
||||||
|
|
||||||
if (thread->status() != ThreadContext::Suspended)
|
if (thread->status() != ThreadContext::Suspended)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
|
@ -462,6 +462,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TheISA::PCState pred_pc;
|
TheISA::PCState pred_pc;
|
||||||
|
|
||||||
|
public:
|
||||||
|
// monitor/mwait funtions
|
||||||
|
void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
|
||||||
|
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
|
||||||
|
void mwaitAtomic(ThreadContext *tc)
|
||||||
|
{ return BaseCPU::mwaitAtomic(tc, thread->dtb); }
|
||||||
|
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __CPU_SIMPLE_BASE_HH__
|
#endif // __CPU_SIMPLE_BASE_HH__
|
||||||
|
|
|
@ -58,6 +58,8 @@
|
||||||
#include "sim/full_system.hh"
|
#include "sim/full_system.hh"
|
||||||
#include "sim/system.hh"
|
#include "sim/system.hh"
|
||||||
|
|
||||||
|
#include "debug/Mwait.hh"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace TheISA;
|
using namespace TheISA;
|
||||||
|
|
||||||
|
@ -818,9 +820,21 @@ TimingSimpleCPU::updateCycleCounts()
|
||||||
void
|
void
|
||||||
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
||||||
{
|
{
|
||||||
|
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||||
|
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
|
||||||
|
cpu->wakeup();
|
||||||
|
}
|
||||||
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
|
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
|
||||||
|
{
|
||||||
|
// X86 ISA: Snooping an invalidation for monitor/mwait
|
||||||
|
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
|
||||||
|
cpu->wakeup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
|
TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
|
||||||
|
|
|
@ -228,11 +228,16 @@ class TimingSimpleCPU : public BaseSimpleCPU
|
||||||
* a wakeup event on a cpu that is monitoring an address
|
* a wakeup event on a cpu that is monitoring an address
|
||||||
*/
|
*/
|
||||||
virtual void recvTimingSnoopReq(PacketPtr pkt);
|
virtual void recvTimingSnoopReq(PacketPtr pkt);
|
||||||
|
virtual void recvFunctionalSnoop(PacketPtr pkt);
|
||||||
|
|
||||||
virtual bool recvTimingResp(PacketPtr pkt);
|
virtual bool recvTimingResp(PacketPtr pkt);
|
||||||
|
|
||||||
virtual void recvRetry();
|
virtual void recvRetry();
|
||||||
|
|
||||||
|
virtual bool isSnooping() const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
struct DTickEvent : public TickEvent
|
struct DTickEvent : public TickEvent
|
||||||
{
|
{
|
||||||
DTickEvent(TimingSimpleCPU *_cpu)
|
DTickEvent(TimingSimpleCPU *_cpu)
|
||||||
|
|
Loading…
Reference in a new issue