diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index e9e60859f..4389eb356 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -27,7 +27,7 @@ # Authors: Nathan Binkert from m5.params import * -from m5.proxy import Self +from m5.proxy import * from MemObject import MemObject from Prefetcher import BasePrefetcher @@ -44,7 +44,6 @@ class BaseCache(MemObject): prioritizeRequests = Param.Bool(False, "always service demand misses first") repl = Param.Repl(NULL, "replacement policy") - num_cpus = Param.Int(1, "number of cpus sharing this cache") size = Param.MemorySize("capacity in bytes") forward_snoops = Param.Bool(True, "forward snoops from mem side to cpu side") @@ -62,3 +61,4 @@ class BaseCache(MemObject): cpu_side = Port("Port on side closer to CPU") mem_side = Port("Port on side closer to MEM") addr_range = Param.AddrRange(AllMemory, "The address range for the CPU-side port") + system = Param.System(Parent.any, "System we belong to") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index b0fb3bc6c..27ff6961b 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -65,7 +65,7 @@ BaseCache::BaseCache(const Params *p) missCount(p->max_miss_count), drainEvent(NULL), addrRange(p->addr_range), - _numCpus(p->num_cpus) + system(p->system) { } @@ -143,11 +143,14 @@ BaseCache::regStats() const string &cstr = cmd.toString(); hits[access_idx] - .init(FullSystem ? (_numCpus + 1) : _numCpus) + .init(system->maxMasters()) .name(name() + "." + cstr + "_hits") .desc("number of " + cstr + " hits") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + hits[access_idx].subname(i, system->getMasterName(i)); + } } // These macros make it easier to sum the right subset of commands and @@ -163,16 +166,22 @@ BaseCache::regStats() demandHits .name(name() + ".demand_hits") .desc("number of demand (read+write) hits") - .flags(total) + .flags(total | nozero | nonan) ; demandHits = SUM_DEMAND(hits); + for (int i = 0; i < system->maxMasters(); i++) { + demandHits.subname(i, system->getMasterName(i)); + } overallHits .name(name() + ".overall_hits") .desc("number of overall hits") - .flags(total) + .flags(total | nozero | nonan) ; overallHits = demandHits + SUM_NON_DEMAND(hits); + for (int i = 0; i < system->maxMasters(); i++) { + overallHits.subname(i, system->getMasterName(i)); + } // Miss statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -180,26 +189,35 @@ BaseCache::regStats() const string &cstr = cmd.toString(); misses[access_idx] - .init(FullSystem ? (_numCpus + 1) : _numCpus) + .init(system->maxMasters()) .name(name() + "." + cstr + "_misses") .desc("number of " + cstr + " misses") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + misses[access_idx].subname(i, system->getMasterName(i)); + } } demandMisses .name(name() + ".demand_misses") .desc("number of demand (read+write) misses") - .flags(total) + .flags(total | nozero | nonan) ; demandMisses = SUM_DEMAND(misses); + for (int i = 0; i < system->maxMasters(); i++) { + demandMisses.subname(i, system->getMasterName(i)); + } overallMisses .name(name() + ".overall_misses") .desc("number of overall misses") - .flags(total) + .flags(total | nozero | nonan) ; overallMisses = demandMisses + SUM_NON_DEMAND(misses); + for (int i = 0; i < system->maxMasters(); i++) { + overallMisses.subname(i, system->getMasterName(i)); + } // Miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -207,26 +225,35 @@ BaseCache::regStats() const string &cstr = cmd.toString(); missLatency[access_idx] - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + "." + cstr + "_miss_latency") .desc("number of " + cstr + " miss cycles") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + missLatency[access_idx].subname(i, system->getMasterName(i)); + } } demandMissLatency .name(name() + ".demand_miss_latency") .desc("number of demand (read+write) miss cycles") - .flags(total) + .flags(total | nozero | nonan) ; demandMissLatency = SUM_DEMAND(missLatency); + for (int i = 0; i < system->maxMasters(); i++) { + demandMissLatency.subname(i, system->getMasterName(i)); + } overallMissLatency .name(name() + ".overall_miss_latency") .desc("number of overall miss cycles") - .flags(total) + .flags(total | nozero | nonan) ; overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); + for (int i = 0; i < system->maxMasters(); i++) { + overallMissLatency.subname(i, system->getMasterName(i)); + } // access formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -238,23 +265,32 @@ BaseCache::regStats() .desc("number of " + cstr + " accesses(hits+misses)") .flags(total | nozero | nonan) ; - accesses[access_idx] = hits[access_idx] + misses[access_idx]; + + for (int i = 0; i < system->maxMasters(); i++) { + accesses[access_idx].subname(i, system->getMasterName(i)); + } } demandAccesses .name(name() + ".demand_accesses") .desc("number of demand (read+write) accesses") - .flags(total) + .flags(total | nozero | nonan) ; demandAccesses = demandHits + demandMisses; + for (int i = 0; i < system->maxMasters(); i++) { + demandAccesses.subname(i, system->getMasterName(i)); + } overallAccesses .name(name() + ".overall_accesses") .desc("number of overall (read+write) accesses") - .flags(total) + .flags(total | nozero | nonan) ; overallAccesses = overallHits + overallMisses; + for (int i = 0; i < system->maxMasters(); i++) { + overallAccesses.subname(i, system->getMasterName(i)); + } // miss rate formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -266,23 +302,32 @@ BaseCache::regStats() .desc("miss rate for " + cstr + " accesses") .flags(total | nozero | nonan) ; - missRate[access_idx] = misses[access_idx] / accesses[access_idx]; + + for (int i = 0; i < system->maxMasters(); i++) { + missRate[access_idx].subname(i, system->getMasterName(i)); + } } demandMissRate .name(name() + ".demand_miss_rate") .desc("miss rate for demand accesses") - .flags(total) + .flags(total | nozero | nonan) ; demandMissRate = demandMisses / demandAccesses; + for (int i = 0; i < system->maxMasters(); i++) { + demandMissRate.subname(i, system->getMasterName(i)); + } overallMissRate .name(name() + ".overall_miss_rate") .desc("miss rate for overall accesses") - .flags(total) + .flags(total | nozero | nonan) ; overallMissRate = overallMisses / overallAccesses; + for (int i = 0; i < system->maxMasters(); i++) { + overallMissRate.subname(i, system->getMasterName(i)); + } // miss latency formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -294,24 +339,33 @@ BaseCache::regStats() .desc("average " + cstr + " miss latency") .flags(total | nozero | nonan) ; - avgMissLatency[access_idx] = missLatency[access_idx] / misses[access_idx]; + + for (int i = 0; i < system->maxMasters(); i++) { + avgMissLatency[access_idx].subname(i, system->getMasterName(i)); + } } demandAvgMissLatency .name(name() + ".demand_avg_miss_latency") .desc("average overall miss latency") - .flags(total) + .flags(total | nozero | nonan) ; demandAvgMissLatency = demandMissLatency / demandMisses; + for (int i = 0; i < system->maxMasters(); i++) { + demandAvgMissLatency.subname(i, system->getMasterName(i)); + } overallAvgMissLatency .name(name() + ".overall_avg_miss_latency") .desc("average overall miss latency") - .flags(total) + .flags(total | nozero | nonan) ; overallAvgMissLatency = overallMissLatency / overallMisses; + for (int i = 0; i < system->maxMasters(); i++) { + overallAvgMissLatency.subname(i, system->getMasterName(i)); + } blocked_cycles.init(NUM_BLOCKED_CAUSES); blocked_cycles @@ -350,11 +404,14 @@ BaseCache::regStats() ; writebacks - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + ".writebacks") .desc("number of writebacks") - .flags(total) + .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + writebacks.subname(i, system->getMasterName(i)); + } // MSHR statistics // MSHR hit statistics @@ -363,26 +420,35 @@ BaseCache::regStats() const string &cstr = cmd.toString(); mshr_hits[access_idx] - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + "." + cstr + "_mshr_hits") .desc("number of " + cstr + " MSHR hits") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + mshr_hits[access_idx].subname(i, system->getMasterName(i)); + } } demandMshrHits .name(name() + ".demand_mshr_hits") .desc("number of demand (read+write) MSHR hits") - .flags(total) + .flags(total | nozero | nonan) ; demandMshrHits = SUM_DEMAND(mshr_hits); + for (int i = 0; i < system->maxMasters(); i++) { + demandMshrHits.subname(i, system->getMasterName(i)); + } overallMshrHits .name(name() + ".overall_mshr_hits") .desc("number of overall MSHR hits") - .flags(total) + .flags(total | nozero | nonan) ; overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits); + for (int i = 0; i < system->maxMasters(); i++) { + overallMshrHits.subname(i, system->getMasterName(i)); + } // MSHR miss statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -390,26 +456,35 @@ BaseCache::regStats() const string &cstr = cmd.toString(); mshr_misses[access_idx] - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + "." + cstr + "_mshr_misses") .desc("number of " + cstr + " MSHR misses") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + mshr_misses[access_idx].subname(i, system->getMasterName(i)); + } } demandMshrMisses .name(name() + ".demand_mshr_misses") .desc("number of demand (read+write) MSHR misses") - .flags(total) + .flags(total | nozero | nonan) ; demandMshrMisses = SUM_DEMAND(mshr_misses); + for (int i = 0; i < system->maxMasters(); i++) { + demandMshrMisses.subname(i, system->getMasterName(i)); + } overallMshrMisses .name(name() + ".overall_mshr_misses") .desc("number of overall MSHR misses") - .flags(total) + .flags(total | nozero | nonan) ; overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses); + for (int i = 0; i < system->maxMasters(); i++) { + overallMshrMisses.subname(i, system->getMasterName(i)); + } // MSHR miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -417,27 +492,36 @@ BaseCache::regStats() const string &cstr = cmd.toString(); mshr_miss_latency[access_idx] - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + "." + cstr + "_mshr_miss_latency") .desc("number of " + cstr + " MSHR miss cycles") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + mshr_miss_latency[access_idx].subname(i, system->getMasterName(i)); + } } demandMshrMissLatency .name(name() + ".demand_mshr_miss_latency") .desc("number of demand (read+write) MSHR miss cycles") - .flags(total) + .flags(total | nozero | nonan) ; demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency); + for (int i = 0; i < system->maxMasters(); i++) { + demandMshrMissLatency.subname(i, system->getMasterName(i)); + } overallMshrMissLatency .name(name() + ".overall_mshr_miss_latency") .desc("number of overall MSHR miss cycles") - .flags(total) + .flags(total | nozero | nonan) ; overallMshrMissLatency = demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency); + for (int i = 0; i < system->maxMasters(); i++) { + overallMshrMissLatency.subname(i, system->getMasterName(i)); + } // MSHR uncacheable statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -445,20 +529,26 @@ BaseCache::regStats() const string &cstr = cmd.toString(); mshr_uncacheable[access_idx] - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + "." + cstr + "_mshr_uncacheable") .desc("number of " + cstr + " MSHR uncacheable") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + mshr_uncacheable[access_idx].subname(i, system->getMasterName(i)); + } } overallMshrUncacheable .name(name() + ".overall_mshr_uncacheable_misses") .desc("number of overall MSHR uncacheable misses") - .flags(total) + .flags(total | nozero | nonan) ; overallMshrUncacheable = SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable); + for (int i = 0; i < system->maxMasters(); i++) { + overallMshrUncacheable.subname(i, system->getMasterName(i)); + } // MSHR miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -466,21 +556,27 @@ BaseCache::regStats() const string &cstr = cmd.toString(); mshr_uncacheable_lat[access_idx] - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + "." + cstr + "_mshr_uncacheable_latency") .desc("number of " + cstr + " MSHR uncacheable cycles") .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + mshr_uncacheable_lat[access_idx].subname(i, system->getMasterName(i)); + } } overallMshrUncacheableLatency .name(name() + ".overall_mshr_uncacheable_latency") .desc("number of overall MSHR uncacheable cycles") - .flags(total) + .flags(total | nozero | nonan) ; overallMshrUncacheableLatency = SUM_DEMAND(mshr_uncacheable_lat) + SUM_NON_DEMAND(mshr_uncacheable_lat); + for (int i = 0; i < system->maxMasters(); i++) { + overallMshrUncacheableLatency.subname(i, system->getMasterName(i)); + } #if 0 // MSHR access formulas @@ -524,24 +620,33 @@ BaseCache::regStats() .desc("mshr miss rate for " + cstr + " accesses") .flags(total | nozero | nonan) ; - mshrMissRate[access_idx] = mshr_misses[access_idx] / accesses[access_idx]; + + for (int i = 0; i < system->maxMasters(); i++) { + mshrMissRate[access_idx].subname(i, system->getMasterName(i)); + } } demandMshrMissRate .name(name() + ".demand_mshr_miss_rate") .desc("mshr miss rate for demand accesses") - .flags(total) + .flags(total | nozero | nonan) ; demandMshrMissRate = demandMshrMisses / demandAccesses; + for (int i = 0; i < system->maxMasters(); i++) { + demandMshrMissRate.subname(i, system->getMasterName(i)); + } overallMshrMissRate .name(name() + ".overall_mshr_miss_rate") .desc("mshr miss rate for overall accesses") - .flags(total) + .flags(total | nozero | nonan) ; overallMshrMissRate = overallMshrMisses / overallAccesses; + for (int i = 0; i < system->maxMasters(); i++) { + overallMshrMissRate.subname(i, system->getMasterName(i)); + } // mshrMiss latency formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -553,24 +658,33 @@ BaseCache::regStats() .desc("average " + cstr + " mshr miss latency") .flags(total | nozero | nonan) ; - avgMshrMissLatency[access_idx] = mshr_miss_latency[access_idx] / mshr_misses[access_idx]; + + for (int i = 0; i < system->maxMasters(); i++) { + avgMshrMissLatency[access_idx].subname(i, system->getMasterName(i)); + } } demandAvgMshrMissLatency .name(name() + ".demand_avg_mshr_miss_latency") .desc("average overall mshr miss latency") - .flags(total) + .flags(total | nozero | nonan) ; demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; + for (int i = 0; i < system->maxMasters(); i++) { + demandAvgMshrMissLatency.subname(i, system->getMasterName(i)); + } overallAvgMshrMissLatency .name(name() + ".overall_avg_mshr_miss_latency") .desc("average overall mshr miss latency") - .flags(total) + .flags(total | nozero | nonan) ; overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; + for (int i = 0; i < system->maxMasters(); i++) { + overallAvgMshrMissLatency.subname(i, system->getMasterName(i)); + } // mshrUncacheable latency formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -582,32 +696,44 @@ BaseCache::regStats() .desc("average " + cstr + " mshr uncacheable latency") .flags(total | nozero | nonan) ; - avgMshrUncacheableLatency[access_idx] = mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; + + for (int i = 0; i < system->maxMasters(); i++) { + avgMshrUncacheableLatency[access_idx].subname(i, system->getMasterName(i)); + } } overallAvgMshrUncacheableLatency .name(name() + ".overall_avg_mshr_uncacheable_latency") .desc("average overall mshr uncacheable latency") - .flags(total) + .flags(total | nozero | nonan) ; overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; + for (int i = 0; i < system->maxMasters(); i++) { + overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i)); + } mshr_cap_events - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + ".mshr_cap_events") .desc("number of times MSHR cap was activated") - .flags(total) + .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + mshr_cap_events.subname(i, system->getMasterName(i)); + } //software prefetching stats soft_prefetch_mshr_full - .init(maxThreadsPerCPU) + .init(system->maxMasters()) .name(name() + ".soft_prefetch_mshr_full") .desc("number of mshr full events for SW prefetching instrutions") - .flags(total) + .flags(total | nozero | nonan) ; + for (int i = 0; i < system->maxMasters(); i++) { + soft_prefetch_mshr_full.subname(i, system->getMasterName(i)); + } mshr_no_allocate_misses .name(name() +".no_allocate_misses") diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 3aaed4455..cff8813cd 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -58,6 +58,7 @@ #include "sim/eventq.hh" #include "sim/full_system.hh" #include "sim/sim_exit.hh" +#include "sim/system.hh" class MSHR; /** @@ -220,11 +221,10 @@ class BaseCache : public MemObject * Normally this is all possible memory addresses. */ Range addrRange; - /** number of cpus sharing this cache - from config file */ - int _numCpus; - public: - int numCpus() { return _numCpus; } + /** System we are currently operating in. */ + System *system; + // Statistics /** * @addtogroup CacheStatistics @@ -488,23 +488,10 @@ class BaseCache : public MemObject virtual bool inMissQueue(Addr addr) = 0; - void incMissCount(PacketPtr pkt, int id) + void incMissCount(PacketPtr pkt) { - - if (pkt->cmd == MemCmd::Writeback) { - assert(id == -1); - misses[pkt->cmdToIndex()][0]++; - /* same thing for writeback hits as misses - no context id - * available, meanwhile writeback hit/miss stats are not used - * in any aggregate hit/miss calculations, so just lump them all - * in bucket 0 */ - } else if (FullSystem && id == -1) { - // Device accesses have id -1 - // lump device accesses into their own bucket - misses[pkt->cmdToIndex()][_numCpus]++; - } else { - misses[pkt->cmdToIndex()][id % _numCpus]++; - } + assert(pkt->req->masterId() < system->maxMasters()); + misses[pkt->cmdToIndex()][pkt->req->masterId()]++; if (missCount) { --missCount; @@ -512,26 +499,11 @@ class BaseCache : public MemObject exitSimLoop("A cache reached the maximum miss count"); } } - void incHitCount(PacketPtr pkt, int id) + void incHitCount(PacketPtr pkt) { + assert(pkt->req->masterId() < system->maxMasters()); + hits[pkt->cmdToIndex()][pkt->req->masterId()]++; - /* Writeback requests don't have a context id associated with - * them, so attributing a hit to a -1 context id is obviously a - * problem. I've noticed in the stats that hits are split into - * demand and non-demand hits - neither of which include writeback - * hits, so here, I'll just put the writeback hits into bucket 0 - * since it won't mess with any other stats -hsul */ - if (pkt->cmd == MemCmd::Writeback) { - assert(id == -1); - hits[pkt->cmdToIndex()][0]++; - } else if (FullSystem && id == -1) { - // Device accesses have id -1 - // lump device accesses into their own bucket - hits[pkt->cmdToIndex()][_numCpus]++; - } else { - /* the % is necessary in case there are switch cpus */ - hits[pkt->cmdToIndex()][id % _numCpus]++; - } } }; diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh index e70760edd..91970e09b 100644 --- a/src/mem/cache/blk.hh +++ b/src/mem/cache/blk.hh @@ -103,8 +103,8 @@ class CacheBlk /** Number of references to this block since it was brought in. */ int refCount; - /** holds the context source ID of the requestor for this block. */ - int contextSrc; + /** holds the source requestor ID for this block. */ + int srcMasterId; protected: /** @@ -135,7 +135,8 @@ class CacheBlk CacheBlk() : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), - set(-1), isTouched(false), refCount(0), contextSrc(-1) + set(-1), isTouched(false), refCount(0), + srcMasterId(Request::invldMasterId) {} /** diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index fbab8465e..87b688617 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -312,7 +312,7 @@ Cache::access(PacketPtr pkt, BlkType *&blk, if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) { // OK to satisfy access - incHitCount(pkt, id); + incHitCount(pkt); satisfyCpuSideRequest(pkt, blk); return true; } @@ -332,10 +332,10 @@ Cache::access(PacketPtr pkt, BlkType *&blk, if (blk == NULL) { // no replaceable block available, give up. // writeback will be forwarded to next level. - incMissCount(pkt, id); + incMissCount(pkt); return false; } - int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + int id = pkt->req->masterId(); tags->insertBlock(pkt->getAddr(), blk, id); blk->status = BlkValid | BlkReadable; } @@ -346,11 +346,11 @@ Cache::access(PacketPtr pkt, BlkType *&blk, } // nothing else to do; writeback doesn't expect response assert(!pkt->needsResponse()); - incHitCount(pkt, id); + incHitCount(pkt); return true; } - incMissCount(pkt, id); + incMissCount(pkt); if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) { // complete miss on store conditional... just give up now @@ -514,7 +514,8 @@ Cache::timingAccess(PacketPtr pkt) if (mshr) { // MSHR hit //@todo remove hw_pf here - mshr_hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + assert(pkt->req->masterId() < system->maxMasters()); + mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++; if (mshr->threadNum != 0/*pkt->req->threadId()*/) { mshr->threadNum = -1; } @@ -529,7 +530,8 @@ Cache::timingAccess(PacketPtr pkt) } } else { // no MSHR - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + assert(pkt->req->masterId() < system->maxMasters()); + mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++; // always mark as cache fill for now... if we implement // no-write-allocate or bypass accesses this will have to // be changed. @@ -849,10 +851,12 @@ Cache::handleResponse(PacketPtr pkt) PacketList writebacks; if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->threadId()*/] += + assert(pkt->req->masterId() < system->maxMasters()); + mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] += miss_latency; } else { - mshr_miss_latency[stats_cmd_idx][0/*pkt->req->threadId()*/] += + assert(pkt->req->masterId() < system->maxMasters()); + mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] += miss_latency; } @@ -898,7 +902,9 @@ Cache::handleResponse(PacketPtr pkt) (transfer_offset ? pkt->finishTime : pkt->firstWordTime); assert(!target->pkt->req->isUncacheable()); - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->threadId()*/] += + + assert(pkt->req->masterId() < system->maxMasters()); + missLatency[target->pkt->cmdToIndex()][target->pkt->req->masterId()] += completion_time - target->recvTime; } else if (pkt->cmd == MemCmd::UpgradeFailResp) { // failed StoreCond upgrade @@ -1003,7 +1009,7 @@ Cache::writebackBlk(BlkType *blk) { assert(blk && blk->isValid() && blk->isDirty()); - writebacks[0/*pkt->req->threadId()*/]++; + writebacks[Request::wbMasterId]++; Request *writebackReq = new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0, @@ -1082,7 +1088,7 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, tempBlock->tag = tags->extractTag(addr); DPRINTF(Cache, "using temp block for %x\n", addr); } else { - int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + int id = pkt->req->masterId(); tags->insertBlock(pkt->getAddr(), blk, id); } @@ -1427,7 +1433,8 @@ Cache::getNextMSHR() !writeBuffer.findMatch(pf_addr)) { // Update statistic on number of prefetches issued // (hwpf_mshr_misses) - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + assert(pkt->req->masterId() < system->maxMasters()); + mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++; // Don't request bus, since we already have it return allocateMissBuffer(pkt, curTick(), false); } else { diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc index ea97954f1..0cabce860 100644 --- a/src/mem/cache/tags/base.cc +++ b/src/mem/cache/tags/base.cc @@ -87,17 +87,23 @@ BaseTags::regStats(const string &name) ; occupancies - .init(cache->numCpus() + 1) + .init(cache->system->maxMasters()) .name(name + ".occ_blocks") - .desc("Average occupied blocks per context") + .desc("Average occupied blocks per requestor") .flags(nozero | nonan) ; + for (int i = 0; i < cache->system->maxMasters(); i++) { + occupancies.subname(i, cache->system->getMasterName(i)); + } avgOccs .name(name + ".occ_percent") .desc("Average percentage of cache occupancy") - .flags(nozero) + .flags(nozero | total) ; + for (int i = 0; i < cache->system->maxMasters(); i++) { + avgOccs.subname(i, cache->system->getMasterName(i)); + } avgOccs = occupancies / Stats::constant(numBlocks); diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh index 93856c19e..576b512e5 100644 --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -97,10 +97,10 @@ class BaseTags /** The cycle that the warmup percentage was hit. */ Stats::Scalar warmupCycle; - /** Average occupancy of each context/cpu using the cache */ + /** Average occupancy of each requestor using the cache */ Stats::AverageVector occupancies; - /** Average occ % of each context/cpu using the cache */ + /** Average occ % of each requestor using the cache */ Stats::Formula avgOccs; /** diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 33f0f14a9..babcedc89 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -116,7 +116,7 @@ LRU::~LRU() } LRU::BlkType* -LRU::accessBlock(Addr addr, int &lat, int context_src) +LRU::accessBlock(Addr addr, int &lat, int master_id) { Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -153,20 +153,8 @@ LRU::findVictim(Addr addr, PacketList &writebacks) unsigned set = extractSet(addr); // grab a replacement candidate BlkType *blk = sets[set].blks[assoc-1]; + if (blk->isValid()) { - replacements[0]++; - totalRefs += blk->refCount; - ++sampledRefs; - blk->refCount = 0; - - // deal with evicted block - if (blk->contextSrc != -1) { - occupancies[blk->contextSrc % cache->numCpus()]--; - blk->contextSrc = -1; - } else { - occupancies[cache->numCpus()]--; - } - DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n", set, regenerateBlkAddr(blk->tag, set)); } @@ -174,7 +162,7 @@ LRU::findVictim(Addr addr, PacketList &writebacks) } void -LRU::insertBlock(Addr addr, BlkType *blk, int context_src) +LRU::insertBlock(Addr addr, BlkType *blk, int master_id) { if (!blk->isTouched) { tagsInUse++; @@ -185,16 +173,28 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src) } } + // If we're replacing a block that was previously valid update + // stats for it. This can't be done in findBlock() because a + // found block might not actually be replaced there if the + // coherence protocol says it can't be. + if (blk->isValid()) { + replacements[0]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + + // deal with evicted block + assert(blk->srcMasterId < cache->system->maxMasters()); + occupancies[blk->srcMasterId]--; + } + // Set tag for new block. Caller is responsible for setting status. blk->tag = extractTag(addr); // deal with what we are bringing in - if (context_src != -1) { - occupancies[context_src % cache->numCpus()]++; - } else { - occupancies[cache->numCpus()]++; - } - blk->contextSrc = context_src; + assert(master_id < cache->system->maxMasters()); + occupancies[master_id]++; + blk->srcMasterId = master_id; unsigned set = extractSet(addr); sets[set].moveToHead(blk); @@ -204,16 +204,15 @@ void LRU::invalidateBlk(BlkType *blk) { if (blk) { + if (blk->isValid()) { + tagsInUse--; + assert(blk->srcMasterId < cache->system->maxMasters()); + occupancies[blk->srcMasterId]--; + blk->srcMasterId = Request::invldMasterId; + } blk->status = 0; blk->isTouched = false; blk->clearLoadLocks(); - tagsInUse--; - if (blk->contextSrc != -1) { - occupancies[blk->contextSrc % cache->numCpus()]--; - blk->contextSrc = -1; - } else { - occupancies[cache->numCpus()]--; - } } } diff --git a/src/mem/request.hh b/src/mem/request.hh index b6128f450..68ef0540a 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -40,6 +40,7 @@ #define __MEM_REQUEST_HH__ #include +#include #include "base/fast_alloc.hh" #include "base/flags.hh" @@ -111,6 +112,10 @@ class Request : public FastAlloc static const MasterID funcMasterId = 1; /** This request id is used for message signaled interrupts */ static const MasterID intMasterId = 2; + /** Invalid request id for assertion checking only. It is invalid behavior + * to ever send this id as part of a request. + * @todo C++1x replace with numeric_limits when constexpr is added */ + static const MasterID invldMasterId = USHRT_MAX; /** @} */ private: diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py index c1358eecd..edb18f39a 100644 --- a/tests/configs/memtest.py +++ b/tests/configs/memtest.py @@ -64,7 +64,6 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(), system.toL2Bus = Bus(clock="500GHz", width=16) system.l2c = L2(size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port -system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py index 9436cf88a..95323c2f6 100644 --- a/tests/configs/o3-timing-mp.py +++ b/tests/configs/o3-timing-mp.py @@ -63,7 +63,6 @@ Bus()) system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port -system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/realview-o3-dual.py b/tests/configs/realview-o3-dual.py index adab96fcb..42532065b 100644 --- a/tests/configs/realview-o3-dual.py +++ b/tests/configs/realview-o3-dual.py @@ -83,7 +83,6 @@ system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port -system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: diff --git a/tests/configs/realview-simple-timing-dual.py b/tests/configs/realview-simple-timing-dual.py index 81646f825..95daa81b6 100644 --- a/tests/configs/realview-simple-timing-dual.py +++ b/tests/configs/realview-simple-timing-dual.py @@ -83,7 +83,6 @@ system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port -system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py index db0c0b9c0..2fa7edb2a 100644 --- a/tests/configs/simple-atomic-mp.py +++ b/tests/configs/simple-atomic-mp.py @@ -62,7 +62,6 @@ Bus()) system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port -system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py index c82ef0a26..06d535154 100644 --- a/tests/configs/simple-timing-mp.py +++ b/tests/configs/simple-timing-mp.py @@ -62,7 +62,6 @@ Bus()) system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port -system.l2c.num_cpus = nb_cores # connect l2c to membus system.l2c.mem_side = system.membus.port diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py index 1680be166..1acfc903b 100644 --- a/tests/configs/tsunami-o3-dual.py +++ b/tests/configs/tsunami-o3-dual.py @@ -85,7 +85,6 @@ system.iocache.mem_side = system.membus.port system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port -system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index 9a29f5c65..ddc7dd1d7 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -83,7 +83,6 @@ system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port -system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index 6b78b71f4..48740ea15 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -83,7 +83,6 @@ system.toL2Bus = Bus() system.l2c = L2(size='4MB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port system.l2c.mem_side = system.membus.port -system.l2c.num_cpus = 2 #connect up the cpu and l1s for c in cpus: