Fix up a few statistics problems.

Stats pretty much line up with old code, except:
- bug in old code included L1 latency in L2 miss time, making it too high
- UniCoherence did cache-to-cache transfers even from non-owner caches,
so occasionally the icache would get a block from the dcache not the L2
- L2 can now receive ReadExReq from L1 since L1s have coherence

--HG--
extra : convert_revision : 5052c1a1767b5a662f30a88f16012165a73b791c
This commit is contained in:
Steve Reinhardt 2007-06-30 13:34:16 -07:00
parent 6ab53415ef
commit 6babda7123
8 changed files with 65 additions and 58 deletions

View file

@ -150,20 +150,29 @@ BaseCache::regStats()
;
}
// These macros make it easier to sum the right subset of commands and
// to change the subset of commands that are considered "demand" vs
// "non-demand"
#define SUM_DEMAND(s) \
(s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::ReadExReq])
// should writebacks be included here? prior code was inconsistent...
#define SUM_NON_DEMAND(s) \
(s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq])
demandHits
.name(name() + ".demand_hits")
.desc("number of demand (read+write) hits")
.flags(total)
;
demandHits = hits[MemCmd::ReadReq] + hits[MemCmd::WriteReq];
demandHits = SUM_DEMAND(hits);
overallHits
.name(name() + ".overall_hits")
.desc("number of overall hits")
.flags(total)
;
overallHits = demandHits + hits[MemCmd::SoftPFReq] + hits[MemCmd::HardPFReq]
+ hits[MemCmd::Writeback];
overallHits = demandHits + SUM_NON_DEMAND(hits);
// Miss statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -183,15 +192,14 @@ BaseCache::regStats()
.desc("number of demand (read+write) misses")
.flags(total)
;
demandMisses = misses[MemCmd::ReadReq] + misses[MemCmd::WriteReq];
demandMisses = SUM_DEMAND(misses);
overallMisses
.name(name() + ".overall_misses")
.desc("number of overall misses")
.flags(total)
;
overallMisses = demandMisses + misses[MemCmd::SoftPFReq] +
misses[MemCmd::HardPFReq] + misses[MemCmd::Writeback];
overallMisses = demandMisses + SUM_NON_DEMAND(misses);
// Miss latency statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -211,15 +219,14 @@ BaseCache::regStats()
.desc("number of demand (read+write) miss cycles")
.flags(total)
;
demandMissLatency = missLatency[MemCmd::ReadReq] + missLatency[MemCmd::WriteReq];
demandMissLatency = SUM_DEMAND(missLatency);
overallMissLatency
.name(name() + ".overall_miss_latency")
.desc("number of overall miss cycles")
.flags(total)
;
overallMissLatency = demandMissLatency + missLatency[MemCmd::SoftPFReq] +
missLatency[MemCmd::HardPFReq];
overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
// access formulas
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -368,15 +375,14 @@ BaseCache::regStats()
.desc("number of demand (read+write) MSHR hits")
.flags(total)
;
demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
demandMshrHits = SUM_DEMAND(mshr_hits);
overallMshrHits
.name(name() + ".overall_mshr_hits")
.desc("number of overall MSHR hits")
.flags(total)
;
overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
mshr_hits[MemCmd::HardPFReq];
overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
// MSHR miss statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -396,15 +402,14 @@ BaseCache::regStats()
.desc("number of demand (read+write) MSHR misses")
.flags(total)
;
demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
demandMshrMisses = SUM_DEMAND(mshr_misses);
overallMshrMisses
.name(name() + ".overall_mshr_misses")
.desc("number of overall MSHR misses")
.flags(total)
;
overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
mshr_misses[MemCmd::HardPFReq];
overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
// MSHR miss latency statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -424,16 +429,15 @@ BaseCache::regStats()
.desc("number of demand (read+write) MSHR miss cycles")
.flags(total)
;
demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
+ mshr_miss_latency[MemCmd::WriteReq];
demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
overallMshrMissLatency
.name(name() + ".overall_mshr_miss_latency")
.desc("number of overall MSHR miss cycles")
.flags(total)
;
overallMshrMissLatency = demandMshrMissLatency +
mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
overallMshrMissLatency =
demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
// MSHR uncacheable statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -453,9 +457,8 @@ BaseCache::regStats()
.desc("number of overall MSHR uncacheable misses")
.flags(total)
;
overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
+ mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
+ mshr_uncacheable[MemCmd::HardPFReq];
overallMshrUncacheable =
SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
// MSHR miss latency statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@ -475,10 +478,9 @@ BaseCache::regStats()
.desc("number of overall MSHR uncacheable cycles")
.flags(total)
;
overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
+ mshr_uncacheable_lat[MemCmd::WriteReq]
+ mshr_uncacheable_lat[MemCmd::SoftPFReq]
+ mshr_uncacheable_lat[MemCmd::HardPFReq];
overallMshrUncacheableLatency =
SUM_DEMAND(mshr_uncacheable_lat) +
SUM_NON_DEMAND(mshr_uncacheable_lat);
#if 0
// MSHR access formulas

View file

@ -476,10 +476,10 @@ class BaseCache : public MemObject
}
}
Tick nextMSHRReadyTick()
Tick nextMSHRReadyTime()
{
return std::min(mshrQueue.nextMSHRReadyTick(),
writeBuffer.nextMSHRReadyTick());
return std::min(mshrQueue.nextMSHRReadyTime(),
writeBuffer.nextMSHRReadyTime());
}
/**

View file

@ -615,7 +615,7 @@ Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
if (!target->pkt->req->isUncacheable()) {
missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
completion_time - target->time;
completion_time - target->recvTime;
}
target->pkt->makeTimingResponse();
cpuSidePort->respond(target->pkt, completion_time);
@ -668,11 +668,14 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
// Can we deallocate MSHR when done?
bool deallocate = false;
// Initial target is used just for stats
MSHR::Target *initial_tgt = mshr->getTarget();
int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
Tick miss_latency = curTick - initial_tgt->recvTime;
if (mshr->isCacheFill) {
#if 0
mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
curTick - pkt->time;
#endif
mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
miss_latency;
DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
pkt->getAddr());
BlkType *blk = tags->findBlock(pkt->getAddr());
@ -698,8 +701,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
}
} else {
if (pkt->req->isUncacheable()) {
mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
curTick - pkt->time;
mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
miss_latency;
}
while (mshr->hasTargets()) {
@ -1262,8 +1265,8 @@ Cache<TagStore>::MemSidePort::sendPacket()
// tried to send packet... if it was successful (no retry), see if
// we need to rerequest bus or not
if (!waitingOnRetry) {
Tick nextReady = std::min(deferredPacketReadyTick(),
myCache()->nextMSHRReadyTick());
Tick nextReady = std::min(deferredPacketReadyTime(),
myCache()->nextMSHRReadyTime());
// @TODO: need to facotr in prefetch requests here somehow
if (nextReady != MaxTick) {
DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);

View file

@ -56,11 +56,11 @@ MSHR::MSHR()
void
MSHR::allocate(Addr _addr, int _size, PacketPtr target,
Tick when, Counter _order)
Tick whenReady, Counter _order)
{
addr = _addr;
size = _size;
readyTick = when;
readyTime = whenReady;
order = _order;
assert(target);
isCacheFill = false;
@ -71,7 +71,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
ntargets = 1;
// Don't know of a case where we would allocate a new MSHR for a
// snoop (mem-side request), so set cpuSide to true here.
targets.push_back(Target(target, when, _order, true));
targets.push_back(Target(target, whenReady, _order, true));
assert(deferredTargets.empty());
deferredNeedsExclusive = false;
pendingInvalidate = false;
@ -94,33 +94,33 @@ MSHR::deallocate()
* Adds a target to an MSHR
*/
void
MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order)
{
if (inService) {
if (!deferredTargets.empty() || pendingInvalidate ||
(!needsExclusive && target->needsExclusive())) {
// need to put on deferred list
deferredTargets.push_back(Target(target, when, _order, true));
deferredTargets.push_back(Target(target, whenReady, _order, true));
if (target->needsExclusive()) {
deferredNeedsExclusive = true;
}
} else {
// still OK to append to outstanding request
targets.push_back(Target(target, when, _order, true));
targets.push_back(Target(target, whenReady, _order, true));
}
} else {
if (target->needsExclusive()) {
needsExclusive = true;
}
targets.push_back(Target(target, when, _order, true));
targets.push_back(Target(target, whenReady, _order, true));
}
++ntargets;
}
void
MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order)
{
assert(inService); // don't bother to call otherwise
@ -137,7 +137,7 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
if (needsExclusive || pkt->needsExclusive()) {
// actual target device (typ. PhysicalMemory) will delete the
// packet on reception, so we need to save a copy here
targets.push_back(Target(new Packet(pkt), when, _order, false));
targets.push_back(Target(new Packet(pkt), whenReady, _order, false));
++ntargets;
if (needsExclusive) {
@ -177,7 +177,7 @@ MSHR::promoteDeferredTargets()
pendingShared = false;
deferredNeedsExclusive = false;
order = targets.front().order;
readyTick = std::max(curTick, targets.front().time);
readyTime = std::max(curTick, targets.front().readyTime);
return true;
}

View file

@ -54,15 +54,17 @@ class MSHR : public Packet::SenderState
class Target {
public:
Tick time; //!< Time when request was received (for stats)
Tick recvTime; //!< Time when request was received (for stats)
Tick readyTime; //!< Time when request is ready to be serviced
Counter order; //!< Global order (for memory consistency mgmt)
PacketPtr pkt; //!< Pending request packet.
bool cpuSide; //!< Did request come from cpu side or mem side?
bool isCpuSide() { return cpuSide; }
Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide)
: time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide)
Target(PacketPtr _pkt, Tick _readyTime, Counter _order, bool _cpuSide)
: recvTime(curTick), readyTime(_readyTime), order(_order),
pkt(_pkt), cpuSide(_cpuSide)
{}
};
@ -81,7 +83,7 @@ class MSHR : public Packet::SenderState
MSHRQueue *queue;
/** Cycle when ready to issue */
Tick readyTick;
Tick readyTime;
/** Order number assigned by the miss queue. */
Counter order;

View file

@ -111,14 +111,14 @@ MSHRQueue::findPending(Addr addr, int size) const
MSHR::Iterator
MSHRQueue::addToReadyList(MSHR *mshr)
{
if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) {
if (readyList.empty() || readyList.back()->readyTime <= mshr->readyTime) {
return readyList.insert(readyList.end(), mshr);
}
MSHR::Iterator i = readyList.begin();
MSHR::Iterator end = readyList.end();
for (; i != end; ++i) {
if ((*i)->readyTick > mshr->readyTick) {
if ((*i)->readyTime > mshr->readyTime) {
return readyList.insert(i, mshr);
}
}

View file

@ -193,15 +193,15 @@ class MSHRQueue
*/
MSHR *getNextMSHR() const
{
if (readyList.empty() || readyList.front()->readyTick > curTick) {
if (readyList.empty() || readyList.front()->readyTime > curTick) {
return NULL;
}
return readyList.front();
}
Tick nextMSHRReadyTick() const
Tick nextMSHRReadyTime() const
{
return readyList.empty() ? MaxTick : readyList.front()->readyTick;
return readyList.empty() ? MaxTick : readyList.front()->readyTime;
}
};

View file

@ -105,7 +105,7 @@ class SimpleTimingPort : public Port
bool deferredPacketReady()
{ return !transmitList.empty() && transmitList.front().tick <= curTick; }
Tick deferredPacketReadyTick()
Tick deferredPacketReadyTime()
{ return transmitList.empty() ? MaxTick : transmitList.front().tick; }
void schedSendEvent(Tick when)