cache: Make caches sharing aware and add occupancy stats.
On the config end, if a shared L2 is created for the system, it is parameterized to have n sharers as defined by option.num_cpus. In addition to making the cache sharing aware so that discriminating tag policies can make use of context_ids to make decisions, I added an occupancy AverageStat and an occ % stat to each cache so that you could know which contexts are occupying how much cache on average, both in terms of blocks and percentage. Note that since devices have context_id -1, having an array of occ stats that correspond to each context_id will break here, so in FS mode I add an extra bucket for device blocks. This bucket is explicitly not added in SE mode in order to not only avoid ugliness in the stats.txt file, but to avoid broken stats (some formulas break when a bucket is 0).
This commit is contained in:
parent
be4cf50c5a
commit
1d3228481f
20 changed files with 126 additions and 23 deletions
|
@ -151,6 +151,7 @@ if options.l2cache:
|
|||
system.tol2bus = Bus()
|
||||
system.l2.cpu_side = system.tol2bus.port
|
||||
system.l2.mem_side = system.membus.port
|
||||
system.l2.num_cpus = np
|
||||
|
||||
for i in xrange(np):
|
||||
if options.caches:
|
||||
|
|
1
src/mem/cache/BaseCache.py
vendored
1
src/mem/cache/BaseCache.py
vendored
|
@ -44,6 +44,7 @@ class BaseCache(MemObject):
|
|||
prioritizeRequests = Param.Bool(False,
|
||||
"always service demand misses first")
|
||||
repl = Param.Repl(NULL, "replacement policy")
|
||||
num_cpus = Param.Int(1, "number of cpus sharing this cache")
|
||||
size = Param.MemorySize("capacity in bytes")
|
||||
forward_snoops = Param.Bool(True,
|
||||
"forward snoops from mem side to cpu side")
|
||||
|
|
15
src/mem/cache/base.cc
vendored
15
src/mem/cache/base.cc
vendored
|
@ -62,7 +62,8 @@ BaseCache::BaseCache(const Params *p)
|
|||
noTargetMSHR(NULL),
|
||||
missCount(p->max_miss_count),
|
||||
drainEvent(NULL),
|
||||
addrRange(p->addr_range)
|
||||
addrRange(p->addr_range),
|
||||
_numCpus(p->num_cpus)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -148,7 +149,11 @@ BaseCache::regStats()
|
|||
const string &cstr = cmd.toString();
|
||||
|
||||
hits[access_idx]
|
||||
.init(maxThreadsPerCPU)
|
||||
#if FULL_SYSTEM
|
||||
.init(_numCpus + 1)
|
||||
#else
|
||||
.init(_numCpus)
|
||||
#endif
|
||||
.name(name() + "." + cstr + "_hits")
|
||||
.desc("number of " + cstr + " hits")
|
||||
.flags(total | nozero | nonan)
|
||||
|
@ -185,7 +190,11 @@ BaseCache::regStats()
|
|||
const string &cstr = cmd.toString();
|
||||
|
||||
misses[access_idx]
|
||||
.init(maxThreadsPerCPU)
|
||||
#if FULL_SYSTEM
|
||||
.init(_numCpus + 1)
|
||||
#else
|
||||
.init(_numCpus)
|
||||
#endif
|
||||
.name(name() + "." + cstr + "_misses")
|
||||
.desc("number of " + cstr + " misses")
|
||||
.flags(total | nozero | nonan)
|
||||
|
|
48
src/mem/cache/base.hh
vendored
48
src/mem/cache/base.hh
vendored
|
@ -47,6 +47,7 @@
|
|||
#include "base/statistics.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "base/types.hh"
|
||||
#include "config/full_system.hh"
|
||||
#include "mem/cache/mshr_queue.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/packet.hh"
|
||||
|
@ -219,7 +220,11 @@ class BaseCache : public MemObject
|
|||
* Normally this is all possible memory addresses. */
|
||||
Range<Addr> addrRange;
|
||||
|
||||
/** number of cpus sharing this cache - from config file */
|
||||
int _numCpus;
|
||||
|
||||
public:
|
||||
int numCpus() { return _numCpus; }
|
||||
// Statistics
|
||||
/**
|
||||
* @addtogroup CacheStatistics
|
||||
|
@ -481,9 +486,25 @@ class BaseCache : public MemObject
|
|||
|
||||
virtual bool inMissQueue(Addr addr) = 0;
|
||||
|
||||
void incMissCount(PacketPtr pkt)
|
||||
void incMissCount(PacketPtr pkt, int id)
|
||||
{
|
||||
misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
|
||||
|
||||
if (pkt->cmd == MemCmd::Writeback) {
|
||||
assert(id == -1);
|
||||
misses[pkt->cmdToIndex()][0]++;
|
||||
/* same thing for writeback hits as misses - no context id
|
||||
* available, meanwhile writeback hit/miss stats are not used
|
||||
* in any aggregate hit/miss calculations, so just lump them all
|
||||
* in bucket 0 */
|
||||
#if FULL_SYSTEM
|
||||
} else if (id == -1) {
|
||||
// Device accesses have id -1
|
||||
// lump device accesses into their own bucket
|
||||
misses[pkt->cmdToIndex()][_numCpus]++;
|
||||
#endif
|
||||
} else {
|
||||
misses[pkt->cmdToIndex()][id % _numCpus]++;
|
||||
}
|
||||
|
||||
if (missCount) {
|
||||
--missCount;
|
||||
|
@ -491,6 +512,29 @@ class BaseCache : public MemObject
|
|||
exitSimLoop("A cache reached the maximum miss count");
|
||||
}
|
||||
}
|
||||
void incHitCount(PacketPtr pkt, int id)
|
||||
{
|
||||
|
||||
/* Writeback requests don't have a context id associated with
|
||||
* them, so attributing a hit to a -1 context id is obviously a
|
||||
* problem. I've noticed in the stats that hits are split into
|
||||
* demand and non-demand hits - neither of which include writeback
|
||||
* hits, so here, I'll just put the writeback hits into bucket 0
|
||||
* since it won't mess with any other stats -hsul */
|
||||
if (pkt->cmd == MemCmd::Writeback) {
|
||||
assert(id == -1);
|
||||
hits[pkt->cmdToIndex()][0]++;
|
||||
#if FULL_SYSTEM
|
||||
} else if (id == -1) {
|
||||
// Device accesses have id -1
|
||||
// lump device accesses into their own bucket
|
||||
hits[pkt->cmdToIndex()][_numCpus]++;
|
||||
#endif
|
||||
} else {
|
||||
/* the % is necessary in case there are switch cpus */
|
||||
hits[pkt->cmdToIndex()][id % _numCpus]++;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
|
5
src/mem/cache/blk.hh
vendored
5
src/mem/cache/blk.hh
vendored
|
@ -104,6 +104,9 @@ class CacheBlk
|
|||
/** Number of references to this block since it was brought in. */
|
||||
int refCount;
|
||||
|
||||
/** holds the context source ID of the requestor for this block. */
|
||||
int contextSrc;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Represents that the indicated thread context has a "lock" on
|
||||
|
@ -133,7 +136,7 @@ class CacheBlk
|
|||
|
||||
CacheBlk()
|
||||
: asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
|
||||
set(-1), isTouched(false), refCount(0)
|
||||
set(-1), isTouched(false), refCount(0), contextSrc(-1)
|
||||
{}
|
||||
|
||||
/**
|
||||
|
|
8
src/mem/cache/cache_impl.hh
vendored
8
src/mem/cache/cache_impl.hh
vendored
|
@ -277,7 +277,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
|
|||
|
||||
if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
|
||||
// OK to satisfy access
|
||||
hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
|
||||
incHitCount(pkt, id);
|
||||
satisfyCpuSideRequest(pkt, blk);
|
||||
return true;
|
||||
}
|
||||
|
@ -297,7 +297,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
|
|||
if (blk == NULL) {
|
||||
// no replaceable block available, give up.
|
||||
// writeback will be forwarded to next level.
|
||||
incMissCount(pkt);
|
||||
incMissCount(pkt, id);
|
||||
return false;
|
||||
}
|
||||
int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
|
||||
|
@ -308,11 +308,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
|
|||
blk->status |= BlkDirty;
|
||||
// nothing else to do; writeback doesn't expect response
|
||||
assert(!pkt->needsResponse());
|
||||
hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
|
||||
incHitCount(pkt, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
incMissCount(pkt);
|
||||
incMissCount(pkt, id);
|
||||
|
||||
if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
|
||||
// complete miss on store conditional... just give up now
|
||||
|
|
15
src/mem/cache/tags/base.cc
vendored
15
src/mem/cache/tags/base.cc
vendored
|
@ -87,5 +87,20 @@ BaseTags::regStats(const string &name)
|
|||
.desc("Cycle when the warmup percentage was hit.")
|
||||
;
|
||||
|
||||
occupancies
|
||||
.init(cache->numCpus())
|
||||
.name(name + ".occ_blocks")
|
||||
.desc("Average occupied blocks per context")
|
||||
.flags(nozero | nonan)
|
||||
;
|
||||
|
||||
avgOccs
|
||||
.name(name + ".occ_%")
|
||||
.desc("Average percentage of cache occupancy")
|
||||
.flags(nozero)
|
||||
;
|
||||
|
||||
avgOccs = occupancies / Stats::constant(numBlocks);
|
||||
|
||||
registerExitCallback(new BaseTagsCallback(this));
|
||||
}
|
||||
|
|
10
src/mem/cache/tags/base.hh
vendored
10
src/mem/cache/tags/base.hh
vendored
|
@ -63,6 +63,9 @@ class BaseTags
|
|||
/** Marked true when the cache is warmed up. */
|
||||
bool warmedUp;
|
||||
|
||||
/** the number of blocks in the cache */
|
||||
unsigned numBlocks;
|
||||
|
||||
// Statistics
|
||||
/**
|
||||
* @addtogroup CacheStatistics
|
||||
|
@ -92,6 +95,13 @@ class BaseTags
|
|||
|
||||
/** The cycle that the warmup percentage was hit. */
|
||||
Stats::Scalar warmupCycle;
|
||||
|
||||
/** Average occupancy of each context/cpu using the cache */
|
||||
Stats::AverageVector occupancies;
|
||||
|
||||
/** Average occ % of each context/cpu using the cache */
|
||||
Stats::Formula avgOccs;
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
|
14
src/mem/cache/tags/fa_lru.cc
vendored
14
src/mem/cache/tags/fa_lru.cc
vendored
|
@ -43,8 +43,7 @@
|
|||
using namespace std;
|
||||
|
||||
FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
|
||||
: blkSize(_blkSize), size(_size),
|
||||
numBlks(size/blkSize), hitLatency(hit_latency)
|
||||
: blkSize(_blkSize), size(_size), hitLatency(hit_latency)
|
||||
{
|
||||
if (!isPowerOf2(blkSize))
|
||||
fatal("cache block size (in bytes) `%d' must be a power of two",
|
||||
|
@ -65,23 +64,24 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
|
|||
|
||||
warmedUp = false;
|
||||
warmupBound = size/blkSize;
|
||||
numBlocks = size/blkSize;
|
||||
|
||||
blks = new FALRUBlk[numBlks];
|
||||
blks = new FALRUBlk[numBlocks];
|
||||
head = &(blks[0]);
|
||||
tail = &(blks[numBlks-1]);
|
||||
tail = &(blks[numBlocks-1]);
|
||||
|
||||
head->prev = NULL;
|
||||
head->next = &(blks[1]);
|
||||
head->inCache = cacheMask;
|
||||
|
||||
tail->prev = &(blks[numBlks-2]);
|
||||
tail->prev = &(blks[numBlocks-2]);
|
||||
tail->next = NULL;
|
||||
tail->inCache = 0;
|
||||
|
||||
unsigned index = (1 << 17) / blkSize;
|
||||
unsigned j = 0;
|
||||
int flags = cacheMask;
|
||||
for (unsigned i = 1; i < numBlks - 1; i++) {
|
||||
for (unsigned i = 1; i < numBlocks - 1; i++) {
|
||||
blks[i].inCache = flags;
|
||||
if (i == index - 1){
|
||||
cacheBoundaries[j] = &(blks[i]);
|
||||
|
@ -94,7 +94,7 @@ FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
|
|||
blks[i].isTouched = false;
|
||||
}
|
||||
assert(j == numCaches);
|
||||
assert(index == numBlks);
|
||||
assert(index == numBlocks);
|
||||
//assert(check());
|
||||
}
|
||||
|
||||
|
|
2
src/mem/cache/tags/fa_lru.hh
vendored
2
src/mem/cache/tags/fa_lru.hh
vendored
|
@ -84,8 +84,6 @@ class FALRU : public BaseTags
|
|||
const unsigned blkSize;
|
||||
/** The size of the cache. */
|
||||
const unsigned size;
|
||||
/** The number of blocks in the cache. */
|
||||
const unsigned numBlks; // calculated internally
|
||||
/** The hit latency of the cache. */
|
||||
const unsigned hitLatency;
|
||||
|
||||
|
|
2
src/mem/cache/tags/iic.cc
vendored
2
src/mem/cache/tags/iic.cc
vendored
|
@ -60,7 +60,6 @@ IIC::IIC(IIC::Params ¶ms) :
|
|||
tagShift(floorLog2(blkSize)), blkMask(blkSize - 1),
|
||||
subShift(floorLog2(subSize)), subMask(numSub - 1),
|
||||
hashDelay(params.hashDelay),
|
||||
numBlocks(params.size/subSize),
|
||||
numTags(hashSets * assoc + params.size/blkSize -1),
|
||||
numSecondary(params.size/blkSize),
|
||||
tagNull(numTags),
|
||||
|
@ -88,6 +87,7 @@ IIC::IIC(IIC::Params ¶ms) :
|
|||
|
||||
warmedUp = false;
|
||||
warmupBound = params.size/blkSize;
|
||||
numBlocks = params.size/subSize;
|
||||
|
||||
// Replacement Policy Initialization
|
||||
repl = params.rp;
|
||||
|
|
2
src/mem/cache/tags/iic.hh
vendored
2
src/mem/cache/tags/iic.hh
vendored
|
@ -197,8 +197,6 @@ class IIC : public BaseTags
|
|||
|
||||
/** The latency of a hash lookup. */
|
||||
const unsigned hashDelay;
|
||||
/** The number of data blocks. */
|
||||
const unsigned numBlocks;
|
||||
/** The total number of tags in primary and secondary. */
|
||||
const unsigned numTags;
|
||||
/** The number of tags in the secondary tag store. */
|
||||
|
|
19
src/mem/cache/tags/lru.cc
vendored
19
src/mem/cache/tags/lru.cc
vendored
|
@ -74,7 +74,8 @@ LRU::LRU(unsigned _numSets, unsigned _blkSize, unsigned _assoc,
|
|||
sets = new CacheSet[numSets];
|
||||
blks = new BlkType[numSets * assoc];
|
||||
// allocate data storage in one big chunk
|
||||
dataBlks = new uint8_t[numSets*assoc*blkSize];
|
||||
numBlocks = numSets * assoc;
|
||||
dataBlks = new uint8_t[numBlocks * blkSize];
|
||||
|
||||
unsigned blkIndex = 0; // index into blks array
|
||||
for (unsigned i = 0; i < numSets; ++i) {
|
||||
|
@ -157,6 +158,12 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
|
|||
++sampledRefs;
|
||||
blk->refCount = 0;
|
||||
|
||||
// deal with evicted block
|
||||
if (blk->contextSrc != -1) {
|
||||
occupancies[blk->contextSrc % cache->numCpus()]--;
|
||||
blk->contextSrc = -1;
|
||||
}
|
||||
|
||||
DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
|
||||
set, regenerateBlkAddr(blk->tag, set));
|
||||
}
|
||||
|
@ -178,6 +185,12 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src)
|
|||
// Set tag for new block. Caller is responsible for setting status.
|
||||
blk->tag = extractTag(addr);
|
||||
|
||||
// deal with what we are bringing in
|
||||
if (context_src != -1) {
|
||||
occupancies[context_src % cache->numCpus()]++;
|
||||
blk->contextSrc = context_src;
|
||||
}
|
||||
|
||||
unsigned set = extractSet(addr);
|
||||
sets[set].moveToHead(blk);
|
||||
}
|
||||
|
@ -190,6 +203,10 @@ LRU::invalidateBlk(BlkType *blk)
|
|||
blk->isTouched = false;
|
||||
blk->clearLoadLocks();
|
||||
tagsInUse--;
|
||||
if (blk->contextSrc != -1) {
|
||||
occupancies[blk->contextSrc % cache->numCpus()]--;
|
||||
blk->contextSrc = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -63,6 +63,7 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(),
|
|||
system.toL2Bus = Bus(clock="500GHz", width=16)
|
||||
system.l2c = L2(size='64kB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.num_cpus = nb_cores
|
||||
|
||||
# connect l2c to membus
|
||||
system.l2c.mem_side = system.membus.port
|
||||
|
|
|
@ -62,6 +62,7 @@ Bus())
|
|||
system.toL2Bus = Bus()
|
||||
system.l2c = L2(size='4MB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.num_cpus = nb_cores
|
||||
|
||||
# connect l2c to membus
|
||||
system.l2c.mem_side = system.membus.port
|
||||
|
|
|
@ -61,6 +61,7 @@ Bus())
|
|||
system.toL2Bus = Bus()
|
||||
system.l2c = L2(size='4MB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.num_cpus = nb_cores
|
||||
|
||||
# connect l2c to membus
|
||||
system.l2c.mem_side = system.membus.port
|
||||
|
|
|
@ -61,6 +61,7 @@ Bus())
|
|||
system.toL2Bus = Bus()
|
||||
system.l2c = L2(size='4MB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.num_cpus = nb_cores
|
||||
|
||||
# connect l2c to membus
|
||||
system.l2c.mem_side = system.membus.port
|
||||
|
|
|
@ -85,6 +85,7 @@ system.iocache.mem_side = system.membus.port
|
|||
system.l2c = L2(size='4MB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.mem_side = system.membus.port
|
||||
system.l2c.num_cpus = 2
|
||||
|
||||
#connect up the cpu and l1s
|
||||
for c in cpus:
|
||||
|
|
|
@ -83,6 +83,7 @@ system.toL2Bus = Bus()
|
|||
system.l2c = L2(size='4MB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.mem_side = system.membus.port
|
||||
system.l2c.num_cpus = 2
|
||||
|
||||
#connect up the cpu and l1s
|
||||
for c in cpus:
|
||||
|
|
|
@ -83,6 +83,7 @@ system.toL2Bus = Bus()
|
|||
system.l2c = L2(size='4MB', assoc=8)
|
||||
system.l2c.cpu_side = system.toL2Bus.port
|
||||
system.l2c.mem_side = system.membus.port
|
||||
system.l2c.num_cpus = 2
|
||||
|
||||
#connect up the cpu and l1s
|
||||
for c in cpus:
|
||||
|
|
Loading…
Reference in a new issue