mem: Add snoop filters to L2 crossbars, and check size

This patch adds a snoop filter to the L2XBar. For now we refrain from
globally adding a snoop filter to the SystemXBar, since the latter is
also used in systems without caches. In scenarios without caches the
snoop filter will not see any writeback/clean evicts from the CPU
ports, despite the fact that they are snooping. To avoid inadvertent
use of the snoop filter in these cases we leave it out for now.

A size check is added to the snoop filter, merely to ensure it does
not grow beyond the total capacity of the caches above it. The size
has to be set manually, and a value of 8 MByte is choosen as suitably
high default.
This commit is contained in:
Ali Jafri 2015-09-25 07:26:57 -04:00
parent 0c5a98f9d1
commit 3aa87251d7
3 changed files with 16 additions and 1 deletions

View file

@ -112,6 +112,9 @@ class SnoopFilter(SimObject):
system = Param.System(Parent.any, "System that the crossbar belongs to.") system = Param.System(Parent.any, "System that the crossbar belongs to.")
# Sanity check on max capacity to track, adjust if needed.
max_capacity = Param.MemorySize('8MB', "Maximum capacity of snoop filter")
# We use a coherent crossbar to connect multiple masters to the L2 # We use a coherent crossbar to connect multiple masters to the L2
# caches. Normally this crossbar would be part of the cache itself. # caches. Normally this crossbar would be part of the cache itself.
class L2XBar(CoherentXBar): class L2XBar(CoherentXBar):
@ -125,6 +128,11 @@ class L2XBar(CoherentXBar):
response_latency = 1 response_latency = 1
snoop_response_latency = 1 snoop_response_latency = 1
# Use a snoop-filter by default, and set the latency to zero as
# the lookup is assumed to overlap with the frontend latency of
# the crossbar
snoop_filter = SnoopFilter(lookup_latency = 0)
# One of the key coherent crossbar instances is the system # One of the key coherent crossbar instances is the system
# interconnect, tying together the CPU clusters, GPUs, and any I/O # interconnect, tying together the CPU clusters, GPUs, and any I/O
# coherent masters, and DRAM controllers. # coherent masters, and DRAM controllers.

View file

@ -184,6 +184,10 @@ SnoopFilter::lookupSnoop(const Packet* cpkt)
auto sf_it = cachedLocations.find(line_addr); auto sf_it = cachedLocations.find(line_addr);
bool is_hit = (sf_it != cachedLocations.end()); bool is_hit = (sf_it != cachedLocations.end());
panic_if(!is_hit && (cachedLocations.size() >= maxEntryCount),
"snoop filter exceeded capacity of %d cache blocks\n",
maxEntryCount);
// If the snoop filter has no entry and its an uncacheable // If the snoop filter has no entry and its an uncacheable
// request, do not create a new snoop filter entry, simply return // request, do not create a new snoop filter entry, simply return
// a NULL portlist. // a NULL portlist.

View file

@ -90,7 +90,8 @@ class SnoopFilter : public SimObject {
SnoopFilter (const SnoopFilterParams *p) : SnoopFilter (const SnoopFilterParams *p) :
SimObject(p), reqLookupResult(cachedLocations.end()), retryItem{0, 0}, SimObject(p), reqLookupResult(cachedLocations.end()), retryItem{0, 0},
linesize(p->system->cacheLineSize()), lookupLatency(p->lookup_latency) linesize(p->system->cacheLineSize()), lookupLatency(p->lookup_latency),
maxEntryCount(p->max_capacity / p->system->cacheLineSize())
{ {
} }
@ -254,6 +255,8 @@ class SnoopFilter : public SimObject {
const unsigned linesize; const unsigned linesize;
/** Latency for doing a lookup in the filter */ /** Latency for doing a lookup in the filter */
const Cycles lookupLatency; const Cycles lookupLatency;
/** Max capacity in terms of cache blocks tracked, for sanity checking */
const unsigned maxEntryCount;
/** Statistics */ /** Statistics */
Stats::Scalar totRequests; Stats::Scalar totRequests;