cpu: Fix LLSC atomic CPU wakeup

Writes to locked memory addresses (LLSC) did not wake up the locking
CPU. This can lead to deadlocks on multi-core runs. In AtomicSimpleCPU,
recvAtomicSnoop was checking if the incoming packet was an invalidation
(isInvalidate) and only then handled a locked snoop. But, writes are
seen instead of invalidates when running without caches (fast-forward
configurations). As as simple fix, now handleLockedSnoop is also called
even if the incoming snoop packet are from writes.
This commit is contained in:
Krishnendra Nathella 2015-07-19 15:03:30 -05:00
parent c0d19391d4
commit cabd4768c7
8 changed files with 3272 additions and 3292 deletions

View file

@ -1617,7 +1617,9 @@ LSQ::recvTimingSnoopReq(PacketPtr pkt)
* this action on snoops. */
/* THREAD */
TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
if (pkt->isInvalidate() || pkt->isWrite()) {
TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
}
}
}

View file

@ -438,10 +438,8 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
int load_idx = loadHead;
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
// Unlock the cpu-local monitor when the CPU sees a snoop to a locked
// address. The CPU can speculatively execute a LL operation after a pending
// SC operation in the pipeline and that can make the cache monitor the CPU
// is connected to valid while it really shouldn't be.
// Only Invalidate packet calls checkSnoop
assert(pkt->isInvalidate());
for (int x = 0; x < cpu->numContexts(); x++) {
ThreadContext *tc = cpu->getContext(x);
bool no_squash = cpu->thread[x]->noSquashFromTC;

View file

@ -292,7 +292,10 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
}
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
// When run without caches, Invalidation packets will not be received
// hence we must check if the incoming packets are writes and wakeup
// the processor accordingly
if (pkt->isInvalidate() || pkt->isWrite()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
pkt->getAddr());
for (auto &t_info : cpu->threadInfo) {

View file

@ -876,8 +876,14 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
}
}
for (auto &t_info : cpu->threadInfo) {
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
// Making it uniform across all CPUs:
// The CPUs need to be woken up only on an invalidation packet (when using caches)
// or on an incoming write packet (when not using caches)
// It is not necessary to wake up the processor on all incoming packets
if (pkt->isInvalidate() || pkt->isWrite()) {
for (auto &t_info : cpu->threadInfo) {
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
}
}
}

View file

@ -104,7 +104,6 @@ clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
forward_snoops=true
hit_latency=2
is_read_only=false
max_miss_count=0
@ -146,7 +145,6 @@ clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
forward_snoops=true
hit_latency=2
is_read_only=true
max_miss_count=0
@ -234,7 +232,6 @@ clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
forward_snoops=true
hit_latency=2
is_read_only=false
max_miss_count=0
@ -276,7 +273,6 @@ clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
forward_snoops=true
hit_latency=2
is_read_only=true
max_miss_count=0
@ -410,7 +406,6 @@ clk_domain=system.clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
forward_snoops=false
hit_latency=50
is_read_only=false
max_miss_count=0
@ -447,7 +442,6 @@ clk_domain=system.cpu_clk_domain
clusivity=mostly_incl
demand_mshr_reserve=1
eventq_index=0
forward_snoops=true
hit_latency=20
is_read_only=false
max_miss_count=0
@ -482,6 +476,7 @@ clk_domain=system.clk_domain
eventq_index=0
forward_latency=4
frontend_latency=3
point_of_coherency=true
response_latency=2
snoop_filter=Null
snoop_response_latency=4
@ -615,6 +610,7 @@ clk_domain=system.cpu_clk_domain
eventq_index=0
forward_latency=0
frontend_latency=1
point_of_coherency=false
response_latency=1
snoop_filter=system.toL2Bus.snoop_filter
snoop_response_latency=1

View file

@ -1,9 +1,9 @@
gem5 Simulator System. http://gem5.org
gem5 is copyrighted software; use the --copyright option for details.
gem5 compiled Jan 21 2016 13:49:21
gem5 started Jan 21 2016 13:50:00
gem5 executing on zizzer, pid 33973
gem5 compiled Feb 29 2016 18:59:12
gem5 started Feb 29 2016 18:59:20
gem5 executing on redacted.arm.com, pid 18325
command line: build/ALPHA/gem5.opt -d build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual -re /z/atgutier/gem5/gem5-commit/tests/run.py build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
Global frequency set at 1000000000000 ticks per second
@ -11,4 +11,4 @@ info: kernel located at: /dist/m5/system/binaries/vmlinux
0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009
info: Entering event queue @ 0. Starting simulation...
info: Launching CPU 1 @ 881785000
Exiting @ tick 1982594146000 because m5_exit instruction encountered
Exiting @ tick 1982592736000 because m5_exit instruction encountered