Mem: Fix issue with dirty block being lost when entire block transferred to non-cache.

This change fixes the problem for all the cases we actively use. If you want to try
more creative I/O device attachments (E.g. sharing an L2), this won't work. You
would need another level of caching between the I/O device and the cache
(which you actually need anyway with our current code to make sure writes
propagate). This is required so that you can mark the cache in between as
top level and it won't try to send ownership of a block to the I/O device.
Asserts have been added that should catch any issues.
This commit is contained in:
Ali Saidi 2011-03-17 19:20:19 -05:00
parent 2f40b3b8ae
commit a432d8e085
24 changed files with 54 additions and 4 deletions

View file

@ -34,6 +34,7 @@ class L1Cache(BaseCache):
latency = '1ns' latency = '1ns'
mshrs = 10 mshrs = 10
tgts_per_mshr = 5 tgts_per_mshr = 5
is_top_level = True
class L2Cache(BaseCache): class L2Cache(BaseCache):
assoc = 8 assoc = 8
@ -49,6 +50,7 @@ class PageTableWalkerCache(BaseCache):
mshrs = 10 mshrs = 10
size = '1kB' size = '1kB'
tgts_per_mshr = 12 tgts_per_mshr = 12
is_top_level = True
class IOCache(BaseCache): class IOCache(BaseCache):
assoc = 8 assoc = 8
@ -58,3 +60,4 @@ class IOCache(BaseCache):
size = '1kB' size = '1kB'
tgts_per_mshr = 12 tgts_per_mshr = 12
forward_snoops = False forward_snoops = False
is_top_level = True

View file

@ -112,6 +112,9 @@ DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
{ {
DPRINTF(Fetch, "Received timing\n"); DPRINTF(Fetch, "Received timing\n");
if (pkt->isResponse()) { if (pkt->isResponse()) {
// We shouldn't ever get a block in ownership state
assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
fetch->processCacheCompletion(pkt); fetch->processCacheCompletion(pkt);
} }
//else Snooped a coherence request, just return //else Snooped a coherence request, just return

View file

@ -139,6 +139,9 @@ DmaPort::recvTiming(PacketPtr pkt)
assert(pendingCount >= 0); assert(pendingCount >= 0);
assert(state); assert(state);
// We shouldn't ever get a block in ownership state
assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
state->numBytes += pkt->req->getSize(); state->numBytes += pkt->req->getSize();
assert(state->totBytes >= state->numBytes); assert(state->totBytes >= state->numBytes);
if (state->totBytes == state->numBytes) { if (state->totBytes == state->numBytes) {

View file

@ -48,6 +48,7 @@ class BaseCache(MemObject):
size = Param.MemorySize("capacity in bytes") size = Param.MemorySize("capacity in bytes")
forward_snoops = Param.Bool(True, forward_snoops = Param.Bool(True,
"forward snoops from mem side to cpu side") "forward snoops from mem side to cpu side")
is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)")
subblock_size = Param.Int(0, subblock_size = Param.Int(0,
"Size of subblock in IIC used for compression") "Size of subblock in IIC used for compression")
tgts_per_mshr = Param.Int("max number of accesses per MSHR") tgts_per_mshr = Param.Int("max number of accesses per MSHR")

View file

@ -58,6 +58,7 @@ BaseCache::BaseCache(const Params *p)
hitLatency(p->latency), hitLatency(p->latency),
numTarget(p->tgts_per_mshr), numTarget(p->tgts_per_mshr),
forwardSnoops(p->forward_snoops), forwardSnoops(p->forward_snoops),
isTopLevel(p->is_top_level),
blocked(0), blocked(0),
noTargetMSHR(NULL), noTargetMSHR(NULL),
missCount(p->max_miss_count), missCount(p->max_miss_count),

View file

@ -194,6 +194,11 @@ class BaseCache : public MemObject
/** Do we forward snoops from mem side port through to cpu side port? */ /** Do we forward snoops from mem side port through to cpu side port? */
bool forwardSnoops; bool forwardSnoops;
/** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should
* never try to forward ownership and similar optimizations to the cpu
* side */
bool isTopLevel;
/** /**
* Bit vector of the blocking reasons for the access path. * Bit vector of the blocking reasons for the access path.
* @sa #BlockedCause * @sa #BlockedCause

View file

@ -216,7 +216,7 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk,
if (blk->isDirty()) { if (blk->isDirty()) {
// special considerations if we're owner: // special considerations if we're owner:
if (!deferred_response) { if (!deferred_response && !isTopLevel) {
// if we are responding immediately and can // if we are responding immediately and can
// signal that we're transferring ownership // signal that we're transferring ownership
// along with exclusivity, do so // along with exclusivity, do so

View file

@ -37,8 +37,12 @@ class MyCache(BaseCache):
mshrs = 10 mshrs = 10
tgts_per_mshr = 5 tgts_per_mshr = 5
class MyL1Cache(MyCache):
is_top_level = True
cpu = InOrderCPU(cpu_id=0) cpu = InOrderCPU(cpu_id=0)
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
MyL1Cache(size = '256kB'),
MyCache(size = '2MB', latency='10ns')) MyCache(size = '2MB', latency='10ns'))
cpu.clock = '2GHz' cpu.clock = '2GHz'

View file

@ -38,6 +38,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 12 mshrs = 12
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -39,6 +39,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -37,8 +37,12 @@ class MyCache(BaseCache):
mshrs = 10 mshrs = 10
tgts_per_mshr = 5 tgts_per_mshr = 5
class MyL1Cache(MyCache):
is_top_level = True
cpu = DerivO3CPU(cpu_id=0) cpu = DerivO3CPU(cpu_id=0)
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
MyL1Cache(size = '256kB'),
MyCache(size = '2MB')) MyCache(size = '2MB'))
cpu.clock = '2GHz' cpu.clock = '2GHz'

View file

@ -43,6 +43,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -65,6 +66,7 @@ class PageTableWalkerCache(BaseCache):
mshrs = 10 mshrs = 10
size = '1kB' size = '1kB'
tgts_per_mshr = 12 tgts_per_mshr = 12
is_top_level = True
# --------------------- # ---------------------
# I/O Cache # I/O Cache
@ -78,6 +80,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range = AddrRange(0, size=mem_size) addr_range = AddrRange(0, size=mem_size)
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpu = AtomicSimpleCPU(cpu_id=0) cpu = AtomicSimpleCPU(cpu_id=0)

View file

@ -44,6 +44,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -40,6 +40,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -41,6 +41,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -38,6 +38,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -38,6 +38,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache

View file

@ -36,8 +36,12 @@ class MyCache(BaseCache):
mshrs = 10 mshrs = 10
tgts_per_mshr = 5 tgts_per_mshr = 5
class MyL1Cache(MyCache):
is_top_level = True
cpu = TimingSimpleCPU(cpu_id=0) cpu = TimingSimpleCPU(cpu_id=0)
cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
MyL1Cache(size = '256kB'),
MyCache(size = '2MB', latency='10ns')) MyCache(size = '2MB', latency='10ns'))
system = System(cpu = cpu, system = System(cpu = cpu,
physmem = PhysicalMemory(), physmem = PhysicalMemory(),

View file

@ -41,6 +41,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -65,6 +66,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range=AddrRange(0, size='8GB') addr_range=AddrRange(0, size='8GB')
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ] cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ]

View file

@ -41,6 +41,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -65,6 +66,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range=AddrRange(0, size='8GB') addr_range=AddrRange(0, size='8GB')
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpu = DerivO3CPU(cpu_id=0) cpu = DerivO3CPU(cpu_id=0)

View file

@ -40,6 +40,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -64,6 +65,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range=AddrRange(0, size='8GB') addr_range=AddrRange(0, size='8GB')
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ] cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ]

View file

@ -40,6 +40,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -64,6 +65,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range=AddrRange(0, size='8GB') addr_range=AddrRange(0, size='8GB')
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpu = AtomicSimpleCPU(cpu_id=0) cpu = AtomicSimpleCPU(cpu_id=0)

View file

@ -40,6 +40,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -64,6 +65,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range=AddrRange(0, size='8GB') addr_range=AddrRange(0, size='8GB')
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ] cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ]

View file

@ -41,6 +41,7 @@ class L1(BaseCache):
block_size = 64 block_size = 64
mshrs = 4 mshrs = 4
tgts_per_mshr = 8 tgts_per_mshr = 8
is_top_level = True
# ---------------------- # ----------------------
# Base L2 Cache # Base L2 Cache
@ -65,6 +66,7 @@ class IOCache(BaseCache):
tgts_per_mshr = 12 tgts_per_mshr = 12
addr_range=AddrRange(0, size='8GB') addr_range=AddrRange(0, size='8GB')
forward_snoops = False forward_snoops = False
is_top_level = True
#cpu #cpu
cpu = TimingSimpleCPU(cpu_id=0) cpu = TimingSimpleCPU(cpu_id=0)