cpu: Fix LLSC atomic CPU wakeup
Writes to locked memory addresses (LLSC) did not wake up the locking CPU. This can lead to deadlocks on multi-core runs. In AtomicSimpleCPU, recvAtomicSnoop was checking if the incoming packet was an invalidation (isInvalidate) and only then handled a locked snoop. But, writes are seen instead of invalidates when running without caches (fast-forward configurations). As as simple fix, now handleLockedSnoop is also called even if the incoming snoop packet are from writes.
This commit is contained in:
parent
c0d19391d4
commit
cabd4768c7
8 changed files with 3272 additions and 3292 deletions
|
@ -1617,7 +1617,9 @@ LSQ::recvTimingSnoopReq(PacketPtr pkt)
|
||||||
* this action on snoops. */
|
* this action on snoops. */
|
||||||
|
|
||||||
/* THREAD */
|
/* THREAD */
|
||||||
TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
|
if (pkt->isInvalidate() || pkt->isWrite()) {
|
||||||
|
TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -438,10 +438,8 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
|
||||||
int load_idx = loadHead;
|
int load_idx = loadHead;
|
||||||
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
|
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
|
||||||
|
|
||||||
// Unlock the cpu-local monitor when the CPU sees a snoop to a locked
|
// Only Invalidate packet calls checkSnoop
|
||||||
// address. The CPU can speculatively execute a LL operation after a pending
|
assert(pkt->isInvalidate());
|
||||||
// SC operation in the pipeline and that can make the cache monitor the CPU
|
|
||||||
// is connected to valid while it really shouldn't be.
|
|
||||||
for (int x = 0; x < cpu->numContexts(); x++) {
|
for (int x = 0; x < cpu->numContexts(); x++) {
|
||||||
ThreadContext *tc = cpu->getContext(x);
|
ThreadContext *tc = cpu->getContext(x);
|
||||||
bool no_squash = cpu->thread[x]->noSquashFromTC;
|
bool no_squash = cpu->thread[x]->noSquashFromTC;
|
||||||
|
|
|
@ -292,7 +292,10 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if snoop invalidates, release any associated locks
|
// if snoop invalidates, release any associated locks
|
||||||
if (pkt->isInvalidate()) {
|
// When run without caches, Invalidation packets will not be received
|
||||||
|
// hence we must check if the incoming packets are writes and wakeup
|
||||||
|
// the processor accordingly
|
||||||
|
if (pkt->isInvalidate() || pkt->isWrite()) {
|
||||||
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
|
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
|
||||||
pkt->getAddr());
|
pkt->getAddr());
|
||||||
for (auto &t_info : cpu->threadInfo) {
|
for (auto &t_info : cpu->threadInfo) {
|
||||||
|
|
|
@ -876,8 +876,14 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &t_info : cpu->threadInfo) {
|
// Making it uniform across all CPUs:
|
||||||
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
|
// The CPUs need to be woken up only on an invalidation packet (when using caches)
|
||||||
|
// or on an incoming write packet (when not using caches)
|
||||||
|
// It is not necessary to wake up the processor on all incoming packets
|
||||||
|
if (pkt->isInvalidate() || pkt->isWrite()) {
|
||||||
|
for (auto &t_info : cpu->threadInfo) {
|
||||||
|
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -104,7 +104,6 @@ clk_domain=system.cpu_clk_domain
|
||||||
clusivity=mostly_incl
|
clusivity=mostly_incl
|
||||||
demand_mshr_reserve=1
|
demand_mshr_reserve=1
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_snoops=true
|
|
||||||
hit_latency=2
|
hit_latency=2
|
||||||
is_read_only=false
|
is_read_only=false
|
||||||
max_miss_count=0
|
max_miss_count=0
|
||||||
|
@ -146,7 +145,6 @@ clk_domain=system.cpu_clk_domain
|
||||||
clusivity=mostly_incl
|
clusivity=mostly_incl
|
||||||
demand_mshr_reserve=1
|
demand_mshr_reserve=1
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_snoops=true
|
|
||||||
hit_latency=2
|
hit_latency=2
|
||||||
is_read_only=true
|
is_read_only=true
|
||||||
max_miss_count=0
|
max_miss_count=0
|
||||||
|
@ -234,7 +232,6 @@ clk_domain=system.cpu_clk_domain
|
||||||
clusivity=mostly_incl
|
clusivity=mostly_incl
|
||||||
demand_mshr_reserve=1
|
demand_mshr_reserve=1
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_snoops=true
|
|
||||||
hit_latency=2
|
hit_latency=2
|
||||||
is_read_only=false
|
is_read_only=false
|
||||||
max_miss_count=0
|
max_miss_count=0
|
||||||
|
@ -276,7 +273,6 @@ clk_domain=system.cpu_clk_domain
|
||||||
clusivity=mostly_incl
|
clusivity=mostly_incl
|
||||||
demand_mshr_reserve=1
|
demand_mshr_reserve=1
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_snoops=true
|
|
||||||
hit_latency=2
|
hit_latency=2
|
||||||
is_read_only=true
|
is_read_only=true
|
||||||
max_miss_count=0
|
max_miss_count=0
|
||||||
|
@ -410,7 +406,6 @@ clk_domain=system.clk_domain
|
||||||
clusivity=mostly_incl
|
clusivity=mostly_incl
|
||||||
demand_mshr_reserve=1
|
demand_mshr_reserve=1
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_snoops=false
|
|
||||||
hit_latency=50
|
hit_latency=50
|
||||||
is_read_only=false
|
is_read_only=false
|
||||||
max_miss_count=0
|
max_miss_count=0
|
||||||
|
@ -447,7 +442,6 @@ clk_domain=system.cpu_clk_domain
|
||||||
clusivity=mostly_incl
|
clusivity=mostly_incl
|
||||||
demand_mshr_reserve=1
|
demand_mshr_reserve=1
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_snoops=true
|
|
||||||
hit_latency=20
|
hit_latency=20
|
||||||
is_read_only=false
|
is_read_only=false
|
||||||
max_miss_count=0
|
max_miss_count=0
|
||||||
|
@ -482,6 +476,7 @@ clk_domain=system.clk_domain
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_latency=4
|
forward_latency=4
|
||||||
frontend_latency=3
|
frontend_latency=3
|
||||||
|
point_of_coherency=true
|
||||||
response_latency=2
|
response_latency=2
|
||||||
snoop_filter=Null
|
snoop_filter=Null
|
||||||
snoop_response_latency=4
|
snoop_response_latency=4
|
||||||
|
@ -615,6 +610,7 @@ clk_domain=system.cpu_clk_domain
|
||||||
eventq_index=0
|
eventq_index=0
|
||||||
forward_latency=0
|
forward_latency=0
|
||||||
frontend_latency=1
|
frontend_latency=1
|
||||||
|
point_of_coherency=false
|
||||||
response_latency=1
|
response_latency=1
|
||||||
snoop_filter=system.toL2Bus.snoop_filter
|
snoop_filter=system.toL2Bus.snoop_filter
|
||||||
snoop_response_latency=1
|
snoop_response_latency=1
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
gem5 Simulator System. http://gem5.org
|
gem5 Simulator System. http://gem5.org
|
||||||
gem5 is copyrighted software; use the --copyright option for details.
|
gem5 is copyrighted software; use the --copyright option for details.
|
||||||
|
|
||||||
gem5 compiled Jan 21 2016 13:49:21
|
gem5 compiled Feb 29 2016 18:59:12
|
||||||
gem5 started Jan 21 2016 13:50:00
|
gem5 started Feb 29 2016 18:59:20
|
||||||
gem5 executing on zizzer, pid 33973
|
gem5 executing on redacted.arm.com, pid 18325
|
||||||
command line: build/ALPHA/gem5.opt -d build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual -re /z/atgutier/gem5/gem5-commit/tests/run.py build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
|
command line: build/ALPHA/gem5.opt -d build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual -re /z/atgutier/gem5/gem5-commit/tests/run.py build/ALPHA/tests/opt/quick/fs/10.linux-boot/alpha/linux/tsunami-simple-timing-dual
|
||||||
|
|
||||||
Global frequency set at 1000000000000 ticks per second
|
Global frequency set at 1000000000000 ticks per second
|
||||||
|
@ -11,4 +11,4 @@ info: kernel located at: /dist/m5/system/binaries/vmlinux
|
||||||
0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009
|
0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009
|
||||||
info: Entering event queue @ 0. Starting simulation...
|
info: Entering event queue @ 0. Starting simulation...
|
||||||
info: Launching CPU 1 @ 881785000
|
info: Launching CPU 1 @ 881785000
|
||||||
Exiting @ tick 1982594146000 because m5_exit instruction encountered
|
Exiting @ tick 1982592736000 because m5_exit instruction encountered
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue