mem: track per-request latencies and access depths in the cache hierarchy
Add some values and methods to the request object to track the translation and access latency for a request and which level of the cache hierarchy responded to the request.
This commit is contained in:
parent
daa781d2db
commit
ca89eba79e
6 changed files with 55 additions and 3 deletions
|
@ -400,6 +400,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
|
|||
fetchStatus[tid] = IcacheAccessComplete;
|
||||
}
|
||||
|
||||
pkt->req->setAccessLatency();
|
||||
// Reset the mem req to NULL.
|
||||
delete pkt->req;
|
||||
delete pkt;
|
||||
|
|
|
@ -129,6 +129,8 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
|||
delete state->mainPkt->req;
|
||||
delete state->mainPkt;
|
||||
}
|
||||
|
||||
pkt->req->setAccessLatency();
|
||||
delete state;
|
||||
delete pkt->req;
|
||||
delete pkt;
|
||||
|
|
|
@ -646,7 +646,6 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
|
|||
|
||||
// received a response from the icache: execute the received
|
||||
// instruction
|
||||
|
||||
assert(!pkt || !pkt->isError());
|
||||
assert(_status == IcacheWaitResponse);
|
||||
|
||||
|
@ -655,6 +654,10 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
|
|||
numCycles += curCycle() - previousCycle;
|
||||
previousCycle = curCycle();
|
||||
|
||||
if (pkt)
|
||||
pkt->req->setAccessLatency();
|
||||
|
||||
|
||||
preExecute();
|
||||
if (curStaticInst && curStaticInst->isMemRef()) {
|
||||
// load or store: just send to dcache
|
||||
|
@ -749,6 +752,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
|
|||
assert(_status == DcacheWaitResponse || _status == DTBWaitResponse ||
|
||||
pkt->req->getFlags().isSet(Request::NO_ACCESS));
|
||||
|
||||
pkt->req->setAccessLatency();
|
||||
numCycles += curCycle() - previousCycle;
|
||||
previousCycle = curCycle();
|
||||
|
||||
|
|
|
@ -256,6 +256,7 @@ class DataTranslation : public BaseTLB::Translation
|
|||
assert(mode == state->mode);
|
||||
if (state->finish(fault, index)) {
|
||||
xc->finishTranslation(state);
|
||||
req->setTranslateLatency();
|
||||
}
|
||||
delete this;
|
||||
}
|
||||
|
|
2
src/mem/cache/base.hh
vendored
2
src/mem/cache/base.hh
vendored
|
@ -568,7 +568,7 @@ class BaseCache : public MemObject
|
|||
{
|
||||
assert(pkt->req->masterId() < system->maxMasters());
|
||||
misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
|
||||
|
||||
pkt->req->incAccessDepth();
|
||||
if (missCount) {
|
||||
--missCount;
|
||||
if (missCount == 0)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2012 ARM Limited
|
||||
* Copyright (c) 2012-2013 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
|
@ -244,6 +244,7 @@ class Request
|
|||
* default constructor.)
|
||||
*/
|
||||
Request()
|
||||
: translateDelta(0), accessDelta(0), depth(0)
|
||||
{}
|
||||
|
||||
/**
|
||||
|
@ -304,6 +305,9 @@ class Request
|
|||
_flags.set(flags);
|
||||
privateFlags.clear(~STICKY_PRIVATE_FLAGS);
|
||||
privateFlags.set(VALID_PADDR|VALID_SIZE);
|
||||
depth = 0;
|
||||
accessDelta = 0;
|
||||
//translateDelta = 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -331,6 +335,9 @@ class Request
|
|||
_flags.set(flags);
|
||||
privateFlags.clear(~STICKY_PRIVATE_FLAGS);
|
||||
privateFlags.set(VALID_VADDR|VALID_SIZE|VALID_PC);
|
||||
depth = 0;
|
||||
accessDelta = 0;
|
||||
translateDelta = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -381,6 +388,23 @@ class Request
|
|||
return _paddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Time for the TLB/table walker to successfully translate this request.
|
||||
*/
|
||||
Tick translateDelta;
|
||||
|
||||
/**
|
||||
* Access latency to complete this memory transaction not including
|
||||
* translation time.
|
||||
*/
|
||||
Tick accessDelta;
|
||||
|
||||
/**
|
||||
* Level of the cache hierachy where this request was responded to
|
||||
* (e.g. 0 = L1; 1 = L2).
|
||||
*/
|
||||
int depth;
|
||||
|
||||
/**
|
||||
* Accessor for size.
|
||||
*/
|
||||
|
@ -535,6 +559,26 @@ class Request
|
|||
return _pc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment/Get the depth at which this request is responded to.
|
||||
* This currently happens when the request misses in any cache level.
|
||||
*/
|
||||
void incAccessDepth() { depth++; }
|
||||
int getAccessDepth() const { return depth; }
|
||||
|
||||
/**
|
||||
* Set/Get the time taken for this request to be successfully translated.
|
||||
*/
|
||||
void setTranslateLatency() { translateDelta = curTick() - _time; }
|
||||
Tick getTranslateLatency() const { return translateDelta; }
|
||||
|
||||
/**
|
||||
* Set/Get the time taken to complete this request's access, not including
|
||||
* the time to successfully translate the request.
|
||||
*/
|
||||
void setAccessLatency() { accessDelta = curTick() - _time - translateDelta; }
|
||||
Tick getAccessLatency() const { return accessDelta; }
|
||||
|
||||
/** Accessor functions for flags. Note that these are for testing
|
||||
only; setting flags should be done via setFlags(). */
|
||||
bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
|
||||
|
|
Loading…
Reference in a new issue