Mem: Fix issue with dirty block being lost when entire block transferred to non-cache.
This change fixes the problem for all the cases we actively use. If you want to try more creative I/O device attachments (E.g. sharing an L2), this won't work. You would need another level of caching between the I/O device and the cache (which you actually need anyway with our current code to make sure writes propagate). This is required so that you can mark the cache in between as top level and it won't try to send ownership of a block to the I/O device. Asserts have been added that should catch any issues.
This commit is contained in:
parent
2f40b3b8ae
commit
a432d8e085
24 changed files with 54 additions and 4 deletions
|
@ -34,6 +34,7 @@ class L1Cache(BaseCache):
|
||||||
latency = '1ns'
|
latency = '1ns'
|
||||||
mshrs = 10
|
mshrs = 10
|
||||||
tgts_per_mshr = 5
|
tgts_per_mshr = 5
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
class L2Cache(BaseCache):
|
class L2Cache(BaseCache):
|
||||||
assoc = 8
|
assoc = 8
|
||||||
|
@ -49,6 +50,7 @@ class PageTableWalkerCache(BaseCache):
|
||||||
mshrs = 10
|
mshrs = 10
|
||||||
size = '1kB'
|
size = '1kB'
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
class IOCache(BaseCache):
|
class IOCache(BaseCache):
|
||||||
assoc = 8
|
assoc = 8
|
||||||
|
@ -58,3 +60,4 @@ class IOCache(BaseCache):
|
||||||
size = '1kB'
|
size = '1kB'
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
|
@ -112,6 +112,9 @@ DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
|
||||||
{
|
{
|
||||||
DPRINTF(Fetch, "Received timing\n");
|
DPRINTF(Fetch, "Received timing\n");
|
||||||
if (pkt->isResponse()) {
|
if (pkt->isResponse()) {
|
||||||
|
// We shouldn't ever get a block in ownership state
|
||||||
|
assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
|
||||||
|
|
||||||
fetch->processCacheCompletion(pkt);
|
fetch->processCacheCompletion(pkt);
|
||||||
}
|
}
|
||||||
//else Snooped a coherence request, just return
|
//else Snooped a coherence request, just return
|
||||||
|
|
|
@ -139,6 +139,9 @@ DmaPort::recvTiming(PacketPtr pkt)
|
||||||
assert(pendingCount >= 0);
|
assert(pendingCount >= 0);
|
||||||
assert(state);
|
assert(state);
|
||||||
|
|
||||||
|
// We shouldn't ever get a block in ownership state
|
||||||
|
assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
|
||||||
|
|
||||||
state->numBytes += pkt->req->getSize();
|
state->numBytes += pkt->req->getSize();
|
||||||
assert(state->totBytes >= state->numBytes);
|
assert(state->totBytes >= state->numBytes);
|
||||||
if (state->totBytes == state->numBytes) {
|
if (state->totBytes == state->numBytes) {
|
||||||
|
|
1
src/mem/cache/BaseCache.py
vendored
1
src/mem/cache/BaseCache.py
vendored
|
@ -48,6 +48,7 @@ class BaseCache(MemObject):
|
||||||
size = Param.MemorySize("capacity in bytes")
|
size = Param.MemorySize("capacity in bytes")
|
||||||
forward_snoops = Param.Bool(True,
|
forward_snoops = Param.Bool(True,
|
||||||
"forward snoops from mem side to cpu side")
|
"forward snoops from mem side to cpu side")
|
||||||
|
is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)")
|
||||||
subblock_size = Param.Int(0,
|
subblock_size = Param.Int(0,
|
||||||
"Size of subblock in IIC used for compression")
|
"Size of subblock in IIC used for compression")
|
||||||
tgts_per_mshr = Param.Int("max number of accesses per MSHR")
|
tgts_per_mshr = Param.Int("max number of accesses per MSHR")
|
||||||
|
|
1
src/mem/cache/base.cc
vendored
1
src/mem/cache/base.cc
vendored
|
@ -58,6 +58,7 @@ BaseCache::BaseCache(const Params *p)
|
||||||
hitLatency(p->latency),
|
hitLatency(p->latency),
|
||||||
numTarget(p->tgts_per_mshr),
|
numTarget(p->tgts_per_mshr),
|
||||||
forwardSnoops(p->forward_snoops),
|
forwardSnoops(p->forward_snoops),
|
||||||
|
isTopLevel(p->is_top_level),
|
||||||
blocked(0),
|
blocked(0),
|
||||||
noTargetMSHR(NULL),
|
noTargetMSHR(NULL),
|
||||||
missCount(p->max_miss_count),
|
missCount(p->max_miss_count),
|
||||||
|
|
5
src/mem/cache/base.hh
vendored
5
src/mem/cache/base.hh
vendored
|
@ -194,6 +194,11 @@ class BaseCache : public MemObject
|
||||||
/** Do we forward snoops from mem side port through to cpu side port? */
|
/** Do we forward snoops from mem side port through to cpu side port? */
|
||||||
bool forwardSnoops;
|
bool forwardSnoops;
|
||||||
|
|
||||||
|
/** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should
|
||||||
|
* never try to forward ownership and similar optimizations to the cpu
|
||||||
|
* side */
|
||||||
|
bool isTopLevel;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bit vector of the blocking reasons for the access path.
|
* Bit vector of the blocking reasons for the access path.
|
||||||
* @sa #BlockedCause
|
* @sa #BlockedCause
|
||||||
|
|
2
src/mem/cache/cache_impl.hh
vendored
2
src/mem/cache/cache_impl.hh
vendored
|
@ -216,7 +216,7 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk,
|
||||||
|
|
||||||
if (blk->isDirty()) {
|
if (blk->isDirty()) {
|
||||||
// special considerations if we're owner:
|
// special considerations if we're owner:
|
||||||
if (!deferred_response) {
|
if (!deferred_response && !isTopLevel) {
|
||||||
// if we are responding immediately and can
|
// if we are responding immediately and can
|
||||||
// signal that we're transferring ownership
|
// signal that we're transferring ownership
|
||||||
// along with exclusivity, do so
|
// along with exclusivity, do so
|
||||||
|
|
|
@ -37,8 +37,12 @@ class MyCache(BaseCache):
|
||||||
mshrs = 10
|
mshrs = 10
|
||||||
tgts_per_mshr = 5
|
tgts_per_mshr = 5
|
||||||
|
|
||||||
|
class MyL1Cache(MyCache):
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
cpu = InOrderCPU(cpu_id=0)
|
cpu = InOrderCPU(cpu_id=0)
|
||||||
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
|
cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
|
||||||
|
MyL1Cache(size = '256kB'),
|
||||||
MyCache(size = '2MB', latency='10ns'))
|
MyCache(size = '2MB', latency='10ns'))
|
||||||
|
|
||||||
cpu.clock = '2GHz'
|
cpu.clock = '2GHz'
|
||||||
|
|
|
@ -38,6 +38,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 12
|
mshrs = 12
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -39,6 +39,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -37,8 +37,12 @@ class MyCache(BaseCache):
|
||||||
mshrs = 10
|
mshrs = 10
|
||||||
tgts_per_mshr = 5
|
tgts_per_mshr = 5
|
||||||
|
|
||||||
|
class MyL1Cache(MyCache):
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
cpu = DerivO3CPU(cpu_id=0)
|
cpu = DerivO3CPU(cpu_id=0)
|
||||||
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
|
cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
|
||||||
|
MyL1Cache(size = '256kB'),
|
||||||
MyCache(size = '2MB'))
|
MyCache(size = '2MB'))
|
||||||
cpu.clock = '2GHz'
|
cpu.clock = '2GHz'
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -65,6 +66,7 @@ class PageTableWalkerCache(BaseCache):
|
||||||
mshrs = 10
|
mshrs = 10
|
||||||
size = '1kB'
|
size = '1kB'
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ---------------------
|
# ---------------------
|
||||||
# I/O Cache
|
# I/O Cache
|
||||||
|
@ -78,6 +80,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range = AddrRange(0, size=mem_size)
|
addr_range = AddrRange(0, size=mem_size)
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpu = AtomicSimpleCPU(cpu_id=0)
|
cpu = AtomicSimpleCPU(cpu_id=0)
|
||||||
|
|
|
@ -44,6 +44,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -40,6 +40,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -41,6 +41,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -38,6 +38,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -38,6 +38,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
|
|
@ -36,8 +36,12 @@ class MyCache(BaseCache):
|
||||||
mshrs = 10
|
mshrs = 10
|
||||||
tgts_per_mshr = 5
|
tgts_per_mshr = 5
|
||||||
|
|
||||||
|
class MyL1Cache(MyCache):
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
cpu = TimingSimpleCPU(cpu_id=0)
|
cpu = TimingSimpleCPU(cpu_id=0)
|
||||||
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
|
cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
|
||||||
|
MyL1Cache(size = '256kB'),
|
||||||
MyCache(size = '2MB', latency='10ns'))
|
MyCache(size = '2MB', latency='10ns'))
|
||||||
system = System(cpu = cpu,
|
system = System(cpu = cpu,
|
||||||
physmem = PhysicalMemory(),
|
physmem = PhysicalMemory(),
|
||||||
|
|
|
@ -41,6 +41,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -65,6 +66,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range=AddrRange(0, size='8GB')
|
addr_range=AddrRange(0, size='8GB')
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ]
|
cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ]
|
||||||
|
|
|
@ -41,6 +41,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -65,6 +66,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range=AddrRange(0, size='8GB')
|
addr_range=AddrRange(0, size='8GB')
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpu = DerivO3CPU(cpu_id=0)
|
cpu = DerivO3CPU(cpu_id=0)
|
||||||
|
|
|
@ -40,6 +40,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -64,6 +65,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range=AddrRange(0, size='8GB')
|
addr_range=AddrRange(0, size='8GB')
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ]
|
cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ]
|
||||||
|
|
|
@ -40,6 +40,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -64,6 +65,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range=AddrRange(0, size='8GB')
|
addr_range=AddrRange(0, size='8GB')
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpu = AtomicSimpleCPU(cpu_id=0)
|
cpu = AtomicSimpleCPU(cpu_id=0)
|
||||||
|
|
|
@ -40,6 +40,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -64,6 +65,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range=AddrRange(0, size='8GB')
|
addr_range=AddrRange(0, size='8GB')
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ]
|
cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ]
|
||||||
|
|
|
@ -41,6 +41,7 @@ class L1(BaseCache):
|
||||||
block_size = 64
|
block_size = 64
|
||||||
mshrs = 4
|
mshrs = 4
|
||||||
tgts_per_mshr = 8
|
tgts_per_mshr = 8
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# Base L2 Cache
|
# Base L2 Cache
|
||||||
|
@ -65,6 +66,7 @@ class IOCache(BaseCache):
|
||||||
tgts_per_mshr = 12
|
tgts_per_mshr = 12
|
||||||
addr_range=AddrRange(0, size='8GB')
|
addr_range=AddrRange(0, size='8GB')
|
||||||
forward_snoops = False
|
forward_snoops = False
|
||||||
|
is_top_level = True
|
||||||
|
|
||||||
#cpu
|
#cpu
|
||||||
cpu = TimingSimpleCPU(cpu_id=0)
|
cpu = TimingSimpleCPU(cpu_id=0)
|
||||||
|
|
Loading…
Reference in a new issue