mem: per-thread cache occupancy and per-block ages
This patch enables tracking of cache occupancy per thread along with ages (in buckets) per cache blocks. Cache occupancy stats are recalculated on each stat dump.
This commit is contained in:
parent
739c6df94e
commit
85e8779de7
15 changed files with 149 additions and 3 deletions
|
@ -308,6 +308,7 @@ TableWalker::processWalk()
|
|||
f = currState->fault;
|
||||
} else {
|
||||
RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId);
|
||||
req->taskId(ContextSwitchTaskId::DMA);
|
||||
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
|
||||
pkt->dataStatic((uint8_t*)&currState->l1Desc.data);
|
||||
port.sendFunctional(pkt);
|
||||
|
@ -653,6 +654,7 @@ TableWalker::doL1Descriptor()
|
|||
} else {
|
||||
RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0,
|
||||
masterId);
|
||||
req->taskId(ContextSwitchTaskId::DMA);
|
||||
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
|
||||
pkt->dataStatic((uint8_t*)&currState->l2Desc.data);
|
||||
port.sendFunctional(pkt);
|
||||
|
|
|
@ -54,6 +54,7 @@
|
|||
#include "base/inifile.hh"
|
||||
#include "base/str.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "debug/Checkpoint.hh"
|
||||
#include "debug/TLB.hh"
|
||||
|
@ -477,6 +478,8 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
|
|||
if (is_priv)
|
||||
req->setFlags(Request::PRIVILEGED);
|
||||
|
||||
req->taskId(tc->getCpuPtr()->taskId());
|
||||
|
||||
DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n",
|
||||
isPriv, flags & UserMode);
|
||||
// If this is a clrex instruction, provide a PA of 0 with no fault
|
||||
|
|
|
@ -890,6 +890,8 @@ BaseDynInst<Impl>::readMem(Addr addr, uint8_t *data,
|
|||
req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
|
||||
thread->contextId(), threadNumber);
|
||||
|
||||
req->taskId(cpu->taskId());
|
||||
|
||||
// Only split the request if the ISA supports unaligned accesses.
|
||||
if (TheISA::HasUnalignedMemAcc) {
|
||||
splitRequest(req, sreqLow, sreqHigh);
|
||||
|
@ -953,6 +955,8 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size,
|
|||
req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
|
||||
thread->contextId(), threadNumber);
|
||||
|
||||
req->taskId(cpu->taskId());
|
||||
|
||||
// Only split the request if the ISA supports unaligned accesses.
|
||||
if (TheISA::HasUnalignedMemAcc) {
|
||||
splitRequest(req, sreqLow, sreqHigh);
|
||||
|
|
|
@ -604,6 +604,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
|
|||
Request::INST_FETCH, cpu->instMasterId(), pc,
|
||||
cpu->thread[tid]->contextId(), tid);
|
||||
|
||||
mem_req->taskId(cpu->taskId());
|
||||
|
||||
memReq[tid] = mem_req;
|
||||
|
||||
// Initiate translation of the icache block
|
||||
|
|
|
@ -301,6 +301,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
|
|||
|
||||
dcache_latency = 0;
|
||||
|
||||
req->taskId(taskId());
|
||||
while (1) {
|
||||
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
|
||||
|
||||
|
@ -387,6 +388,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
|
|||
|
||||
dcache_latency = 0;
|
||||
|
||||
req->taskId(taskId());
|
||||
while(1) {
|
||||
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
|
||||
|
||||
|
@ -492,6 +494,7 @@ AtomicSimpleCPU::tick()
|
|||
bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
|
||||
!curMacroStaticInst;
|
||||
if (needToFetch) {
|
||||
ifetch_req.taskId(taskId());
|
||||
setupFetchRequest(&ifetch_req);
|
||||
fault = thread->itb->translateAtomic(&ifetch_req, tc,
|
||||
BaseTLB::Execute);
|
||||
|
|
|
@ -415,6 +415,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
|
|||
RequestPtr req = new Request(asid, addr, size,
|
||||
flags, dataMasterId(), pc, _cpuId, tid);
|
||||
|
||||
req->taskId(taskId());
|
||||
|
||||
Addr split_addr = roundDown(addr + size - 1, block_size);
|
||||
assert(split_addr <= addr || split_addr - addr < block_size);
|
||||
|
||||
|
@ -484,6 +486,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
|
|||
RequestPtr req = new Request(asid, addr, size,
|
||||
flags, dataMasterId(), pc, _cpuId, tid);
|
||||
|
||||
req->taskId(taskId());
|
||||
|
||||
Addr split_addr = roundDown(addr + size - 1, block_size);
|
||||
assert(split_addr <= addr || split_addr - addr < block_size);
|
||||
|
||||
|
@ -561,6 +565,7 @@ TimingSimpleCPU::fetch()
|
|||
if (needToFetch) {
|
||||
_status = BaseSimpleCPU::Running;
|
||||
Request *ifetch_req = new Request();
|
||||
ifetch_req->taskId(taskId());
|
||||
ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
|
||||
setupFetchRequest(ifetch_req);
|
||||
DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr());
|
||||
|
|
|
@ -166,6 +166,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
|
|||
for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
|
||||
!gen.done(); gen.next()) {
|
||||
Request *req = new Request(gen.addr(), gen.size(), flag, masterId);
|
||||
req->taskId(ContextSwitchTaskId::DMA);
|
||||
PacketPtr pkt = new Packet(req, cmd);
|
||||
|
||||
// Increment the data pointer on a write
|
||||
|
|
12
src/mem/cache/blk.hh
vendored
12
src/mem/cache/blk.hh
vendored
|
@ -80,6 +80,9 @@ enum CacheBlkStatusBits {
|
|||
class CacheBlk
|
||||
{
|
||||
public:
|
||||
/** Task Id associated with this block */
|
||||
uint32_t task_id;
|
||||
|
||||
/** The address space ID of this block. */
|
||||
int asid;
|
||||
/** Data block tag value. */
|
||||
|
@ -119,6 +122,8 @@ class CacheBlk
|
|||
/** holds the source requestor ID for this block. */
|
||||
int srcMasterId;
|
||||
|
||||
Tick tickInserted;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Represents that the indicated thread context has a "lock" on
|
||||
|
@ -162,9 +167,11 @@ class CacheBlk
|
|||
public:
|
||||
|
||||
CacheBlk()
|
||||
: asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
|
||||
: task_id(ContextSwitchTaskId::Unknown),
|
||||
asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
|
||||
set(-1), isTouched(false), refCount(0),
|
||||
srcMasterId(Request::invldMasterId)
|
||||
srcMasterId(Request::invldMasterId),
|
||||
tickInserted(0)
|
||||
{}
|
||||
|
||||
/**
|
||||
|
@ -182,6 +189,7 @@ class CacheBlk
|
|||
whenReady = rhs.whenReady;
|
||||
set = rhs.set;
|
||||
refCount = rhs.refCount;
|
||||
task_id = rhs.task_id;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
6
src/mem/cache/cache_impl.hh
vendored
6
src/mem/cache/cache_impl.hh
vendored
|
@ -1074,6 +1074,11 @@ Cache<TagStore>::writebackBlk(BlkType *blk)
|
|||
Request *writebackReq =
|
||||
new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
|
||||
Request::wbMasterId);
|
||||
|
||||
writebackReq->taskId(blk->task_id);
|
||||
blk->task_id= ContextSwitchTaskId::Unknown;
|
||||
blk->tickInserted = curTick();
|
||||
|
||||
PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback);
|
||||
if (blk->isWritable()) {
|
||||
writeback->setSupplyExclusive();
|
||||
|
@ -1120,6 +1125,7 @@ Cache<TagStore>::writebackVisitor(BlkType &blk)
|
|||
|
||||
Request request(tags->regenerateBlkAddr(blk.tag, blk.set),
|
||||
blkSize, 0, Request::funcMasterId);
|
||||
request.taskId(blk.task_id);
|
||||
|
||||
Packet packet(&request, MemCmd::WriteReq);
|
||||
packet.dataStatic(blk.data);
|
||||
|
|
1
src/mem/cache/prefetch/base.cc
vendored
1
src/mem/cache/prefetch/base.cc
vendored
|
@ -247,6 +247,7 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
|
|||
|
||||
// create a prefetch memreq
|
||||
Request *prefetchReq = new Request(*addrIter, blkSize, 0, masterId);
|
||||
prefetchReq->taskId(ContextSwitchTaskId::Prefetcher);
|
||||
PacketPtr prefetch =
|
||||
new Packet(prefetchReq, MemCmd::HardPFReq);
|
||||
prefetch->allocate();
|
||||
|
|
23
src/mem/cache/tags/base.cc
vendored
23
src/mem/cache/tags/base.cc
vendored
|
@ -125,5 +125,28 @@ BaseTags::regStats()
|
|||
|
||||
avgOccs = occupancies / Stats::constant(numBlocks);
|
||||
|
||||
occupanciesTaskId
|
||||
.init(ContextSwitchTaskId::NumTaskId)
|
||||
.name(name() + ".occ_task_id_blocks")
|
||||
.desc("Occupied blocks per task id")
|
||||
.flags(nozero | nonan)
|
||||
;
|
||||
|
||||
ageTaskId
|
||||
.init(ContextSwitchTaskId::NumTaskId, 5)
|
||||
.name(name() + ".age_task_id_blocks")
|
||||
.desc("Occupied blocks per task id")
|
||||
.flags(nozero | nonan)
|
||||
;
|
||||
|
||||
percentOccsTaskId
|
||||
.name(name() + ".occ_task_id_percent")
|
||||
.desc("Percentage of cache occupancy per task id")
|
||||
.flags(nozero)
|
||||
;
|
||||
|
||||
percentOccsTaskId = occupanciesTaskId / Stats::constant(numBlocks);
|
||||
|
||||
registerDumpCallback(new BaseTagsDumpCallback(this));
|
||||
registerExitCallback(new BaseTagsCallback(this));
|
||||
}
|
||||
|
|
22
src/mem/cache/tags/base.hh
vendored
22
src/mem/cache/tags/base.hh
vendored
|
@ -121,6 +121,15 @@ class BaseTags : public ClockedObject
|
|||
/** Average occ % of each requestor using the cache */
|
||||
Stats::Formula avgOccs;
|
||||
|
||||
/** Occupancy of each context/cpu using the cache */
|
||||
Stats::Vector occupanciesTaskId;
|
||||
|
||||
/** Occupancy of each context/cpu using the cache */
|
||||
Stats::Vector2d ageTaskId;
|
||||
|
||||
/** Occ % of each context/cpu using the cache */
|
||||
Stats::Formula percentOccsTaskId;
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
@ -151,6 +160,11 @@ class BaseTags : public ClockedObject
|
|||
*/
|
||||
virtual void cleanupRefs() {}
|
||||
|
||||
/**
|
||||
* Computes stats just prior to dump event
|
||||
*/
|
||||
virtual void computeStats() {}
|
||||
|
||||
/**
|
||||
*iterated through all blocks and clear all locks
|
||||
*Needed to clear all lock tracking at once
|
||||
|
@ -171,4 +185,12 @@ class BaseTagsCallback : public Callback
|
|||
virtual void process() { tags->cleanupRefs(); };
|
||||
};
|
||||
|
||||
class BaseTagsDumpCallback : public Callback
|
||||
{
|
||||
BaseTags *tags;
|
||||
public:
|
||||
BaseTagsDumpCallback(BaseTags *t) : tags(t) {}
|
||||
virtual void process() { tags->computeStats(); };
|
||||
};
|
||||
|
||||
#endif //__BASE_TAGS_HH__
|
||||
|
|
40
src/mem/cache/tags/lru.cc
vendored
40
src/mem/cache/tags/lru.cc
vendored
|
@ -176,6 +176,7 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk)
|
|||
{
|
||||
Addr addr = pkt->getAddr();
|
||||
MasterID master_id = pkt->req->masterId();
|
||||
uint32_t task_id = pkt->req->taskId();
|
||||
if (!blk->isTouched) {
|
||||
tagsInUse++;
|
||||
blk->isTouched = true;
|
||||
|
@ -210,6 +211,8 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk)
|
|||
assert(master_id < cache->system->maxMasters());
|
||||
occupancies[master_id]++;
|
||||
blk->srcMasterId = master_id;
|
||||
blk->task_id = task_id;
|
||||
blk->tickInserted = curTick();
|
||||
|
||||
unsigned set = extractSet(addr);
|
||||
sets[set].moveToHead(blk);
|
||||
|
@ -224,6 +227,8 @@ LRU::invalidate(BlkType *blk)
|
|||
assert(blk->srcMasterId < cache->system->maxMasters());
|
||||
occupancies[blk->srcMasterId]--;
|
||||
blk->srcMasterId = Request::invldMasterId;
|
||||
blk->task_id = ContextSwitchTaskId::Unknown;
|
||||
blk->tickInserted = curTick();
|
||||
|
||||
// should be evicted before valid blocks
|
||||
unsigned set = blk->set;
|
||||
|
@ -270,3 +275,38 @@ LRU::cleanupRefs()
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LRU::computeStats()
|
||||
{
|
||||
for (unsigned i = 0; i < ContextSwitchTaskId::NumTaskId; ++i) {
|
||||
occupanciesTaskId[i] = 0;
|
||||
for (unsigned j = 0; j < 5; ++j) {
|
||||
ageTaskId[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < numSets * assoc; ++i) {
|
||||
if (blks[i].isValid()) {
|
||||
assert(blks[i].task_id < ContextSwitchTaskId::NumTaskId);
|
||||
occupanciesTaskId[blks[i].task_id]++;
|
||||
Tick age = curTick() - blks[i].tickInserted;
|
||||
assert(age >= 0);
|
||||
|
||||
int age_index;
|
||||
if (age / SimClock::Int::us < 10) { // <10us
|
||||
age_index = 0;
|
||||
} else if (age / SimClock::Int::us < 100) { // <100us
|
||||
age_index = 1;
|
||||
} else if (age / SimClock::Int::ms < 1) { // <1ms
|
||||
age_index = 2;
|
||||
} else if (age / SimClock::Int::ms < 10) { // <10ms
|
||||
age_index = 3;
|
||||
} else
|
||||
age_index = 4; // >10ms
|
||||
|
||||
ageTaskId[blks[i].task_id][age_index]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
5
src/mem/cache/tags/lru.hh
vendored
5
src/mem/cache/tags/lru.hh
vendored
|
@ -252,6 +252,11 @@ public:
|
|||
*/
|
||||
virtual std::string print() const;
|
||||
|
||||
/**
|
||||
* Called prior to dumping stats to compute task occupancy
|
||||
*/
|
||||
virtual void computeStats();
|
||||
|
||||
/**
|
||||
* Visit each block in the tag store and apply a visitor to the
|
||||
* block.
|
||||
|
|
|
@ -219,6 +219,11 @@ class Request
|
|||
*/
|
||||
Tick _time;
|
||||
|
||||
/**
|
||||
* The task id associated with this request
|
||||
*/
|
||||
uint32_t _taskId;
|
||||
|
||||
/** The address space ID. */
|
||||
int _asid;
|
||||
|
||||
|
@ -244,7 +249,8 @@ class Request
|
|||
* default constructor.)
|
||||
*/
|
||||
Request()
|
||||
: translateDelta(0), accessDelta(0), depth(0)
|
||||
: _taskId(ContextSwitchTaskId::Unknown),
|
||||
translateDelta(0), accessDelta(0), depth(0)
|
||||
{}
|
||||
|
||||
/**
|
||||
|
@ -253,16 +259,19 @@ class Request
|
|||
* These fields are adequate to perform a request.
|
||||
*/
|
||||
Request(Addr paddr, int size, Flags flags, MasterID mid)
|
||||
: _taskId(ContextSwitchTaskId::Unknown)
|
||||
{
|
||||
setPhys(paddr, size, flags, mid);
|
||||
}
|
||||
|
||||
Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time)
|
||||
: _taskId(ContextSwitchTaskId::Unknown)
|
||||
{
|
||||
setPhys(paddr, size, flags, mid, time);
|
||||
}
|
||||
|
||||
Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time, Addr pc)
|
||||
: _taskId(ContextSwitchTaskId::Unknown)
|
||||
{
|
||||
setPhys(paddr, size, flags, mid, time);
|
||||
privateFlags.set(VALID_PC);
|
||||
|
@ -271,6 +280,7 @@ class Request
|
|||
|
||||
Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc,
|
||||
int cid, ThreadID tid)
|
||||
: _taskId(ContextSwitchTaskId::Unknown)
|
||||
{
|
||||
setVirt(asid, vaddr, size, flags, mid, pc);
|
||||
setThreadContext(cid, tid);
|
||||
|
@ -477,6 +487,17 @@ class Request
|
|||
return _masterId;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
taskId() const
|
||||
{
|
||||
return _taskId;
|
||||
}
|
||||
|
||||
void
|
||||
taskId(uint32_t id) {
|
||||
_taskId = id;
|
||||
}
|
||||
|
||||
/** Accessor function for asid.*/
|
||||
int
|
||||
getAsid()
|
||||
|
|
Loading…
Reference in a new issue