mem: per-thread cache occupancy and per-block ages

This patch enables tracking of cache occupancy per thread along with
ages (in buckets) per cache blocks.  Cache occupancy stats are
recalculated on each stat dump.
This commit is contained in:
Dam Sunwoo 2014-01-24 15:29:30 -06:00
parent 739c6df94e
commit 85e8779de7
15 changed files with 149 additions and 3 deletions

View file

@ -308,6 +308,7 @@ TableWalker::processWalk()
f = currState->fault;
} else {
RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId);
req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
pkt->dataStatic((uint8_t*)&currState->l1Desc.data);
port.sendFunctional(pkt);
@ -653,6 +654,7 @@ TableWalker::doL1Descriptor()
} else {
RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0,
masterId);
req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
pkt->dataStatic((uint8_t*)&currState->l2Desc.data);
port.sendFunctional(pkt);

View file

@ -54,6 +54,7 @@
#include "base/inifile.hh"
#include "base/str.hh"
#include "base/trace.hh"
#include "cpu/base.hh"
#include "cpu/thread_context.hh"
#include "debug/Checkpoint.hh"
#include "debug/TLB.hh"
@ -477,6 +478,8 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
if (is_priv)
req->setFlags(Request::PRIVILEGED);
req->taskId(tc->getCpuPtr()->taskId());
DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n",
isPriv, flags & UserMode);
// If this is a clrex instruction, provide a PA of 0 with no fault

View file

@ -890,6 +890,8 @@ BaseDynInst<Impl>::readMem(Addr addr, uint8_t *data,
req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
thread->contextId(), threadNumber);
req->taskId(cpu->taskId());
// Only split the request if the ISA supports unaligned accesses.
if (TheISA::HasUnalignedMemAcc) {
splitRequest(req, sreqLow, sreqHigh);
@ -953,6 +955,8 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size,
req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
thread->contextId(), threadNumber);
req->taskId(cpu->taskId());
// Only split the request if the ISA supports unaligned accesses.
if (TheISA::HasUnalignedMemAcc) {
splitRequest(req, sreqLow, sreqHigh);

View file

@ -604,6 +604,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Request::INST_FETCH, cpu->instMasterId(), pc,
cpu->thread[tid]->contextId(), tid);
mem_req->taskId(cpu->taskId());
memReq[tid] = mem_req;
// Initiate translation of the icache block

View file

@ -301,6 +301,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
dcache_latency = 0;
req->taskId(taskId());
while (1) {
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
@ -387,6 +388,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
dcache_latency = 0;
req->taskId(taskId());
while(1) {
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
@ -492,6 +494,7 @@ AtomicSimpleCPU::tick()
bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
!curMacroStaticInst;
if (needToFetch) {
ifetch_req.taskId(taskId());
setupFetchRequest(&ifetch_req);
fault = thread->itb->translateAtomic(&ifetch_req, tc,
BaseTLB::Execute);

View file

@ -415,6 +415,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
RequestPtr req = new Request(asid, addr, size,
flags, dataMasterId(), pc, _cpuId, tid);
req->taskId(taskId());
Addr split_addr = roundDown(addr + size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size);
@ -484,6 +486,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
RequestPtr req = new Request(asid, addr, size,
flags, dataMasterId(), pc, _cpuId, tid);
req->taskId(taskId());
Addr split_addr = roundDown(addr + size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size);
@ -561,6 +565,7 @@ TimingSimpleCPU::fetch()
if (needToFetch) {
_status = BaseSimpleCPU::Running;
Request *ifetch_req = new Request();
ifetch_req->taskId(taskId());
ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
setupFetchRequest(ifetch_req);
DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr());

View file

@ -166,6 +166,7 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
!gen.done(); gen.next()) {
Request *req = new Request(gen.addr(), gen.size(), flag, masterId);
req->taskId(ContextSwitchTaskId::DMA);
PacketPtr pkt = new Packet(req, cmd);
// Increment the data pointer on a write

12
src/mem/cache/blk.hh vendored
View file

@ -80,6 +80,9 @@ enum CacheBlkStatusBits {
class CacheBlk
{
public:
/** Task Id associated with this block */
uint32_t task_id;
/** The address space ID of this block. */
int asid;
/** Data block tag value. */
@ -119,6 +122,8 @@ class CacheBlk
/** holds the source requestor ID for this block. */
int srcMasterId;
Tick tickInserted;
protected:
/**
* Represents that the indicated thread context has a "lock" on
@ -162,9 +167,11 @@ class CacheBlk
public:
CacheBlk()
: asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
: task_id(ContextSwitchTaskId::Unknown),
asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
set(-1), isTouched(false), refCount(0),
srcMasterId(Request::invldMasterId)
srcMasterId(Request::invldMasterId),
tickInserted(0)
{}
/**
@ -182,6 +189,7 @@ class CacheBlk
whenReady = rhs.whenReady;
set = rhs.set;
refCount = rhs.refCount;
task_id = rhs.task_id;
return *this;
}

View file

@ -1074,6 +1074,11 @@ Cache<TagStore>::writebackBlk(BlkType *blk)
Request *writebackReq =
new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
Request::wbMasterId);
writebackReq->taskId(blk->task_id);
blk->task_id= ContextSwitchTaskId::Unknown;
blk->tickInserted = curTick();
PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback);
if (blk->isWritable()) {
writeback->setSupplyExclusive();
@ -1120,6 +1125,7 @@ Cache<TagStore>::writebackVisitor(BlkType &blk)
Request request(tags->regenerateBlkAddr(blk.tag, blk.set),
blkSize, 0, Request::funcMasterId);
request.taskId(blk.task_id);
Packet packet(&request, MemCmd::WriteReq);
packet.dataStatic(blk.data);

View file

@ -247,6 +247,7 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
// create a prefetch memreq
Request *prefetchReq = new Request(*addrIter, blkSize, 0, masterId);
prefetchReq->taskId(ContextSwitchTaskId::Prefetcher);
PacketPtr prefetch =
new Packet(prefetchReq, MemCmd::HardPFReq);
prefetch->allocate();

View file

@ -125,5 +125,28 @@ BaseTags::regStats()
avgOccs = occupancies / Stats::constant(numBlocks);
occupanciesTaskId
.init(ContextSwitchTaskId::NumTaskId)
.name(name() + ".occ_task_id_blocks")
.desc("Occupied blocks per task id")
.flags(nozero | nonan)
;
ageTaskId
.init(ContextSwitchTaskId::NumTaskId, 5)
.name(name() + ".age_task_id_blocks")
.desc("Occupied blocks per task id")
.flags(nozero | nonan)
;
percentOccsTaskId
.name(name() + ".occ_task_id_percent")
.desc("Percentage of cache occupancy per task id")
.flags(nozero)
;
percentOccsTaskId = occupanciesTaskId / Stats::constant(numBlocks);
registerDumpCallback(new BaseTagsDumpCallback(this));
registerExitCallback(new BaseTagsCallback(this));
}

View file

@ -121,6 +121,15 @@ class BaseTags : public ClockedObject
/** Average occ % of each requestor using the cache */
Stats::Formula avgOccs;
/** Occupancy of each context/cpu using the cache */
Stats::Vector occupanciesTaskId;
/** Occupancy of each context/cpu using the cache */
Stats::Vector2d ageTaskId;
/** Occ % of each context/cpu using the cache */
Stats::Formula percentOccsTaskId;
/**
* @}
*/
@ -151,6 +160,11 @@ class BaseTags : public ClockedObject
*/
virtual void cleanupRefs() {}
/**
* Computes stats just prior to dump event
*/
virtual void computeStats() {}
/**
*iterated through all blocks and clear all locks
*Needed to clear all lock tracking at once
@ -171,4 +185,12 @@ class BaseTagsCallback : public Callback
virtual void process() { tags->cleanupRefs(); };
};
class BaseTagsDumpCallback : public Callback
{
BaseTags *tags;
public:
BaseTagsDumpCallback(BaseTags *t) : tags(t) {}
virtual void process() { tags->computeStats(); };
};
#endif //__BASE_TAGS_HH__

View file

@ -176,6 +176,7 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk)
{
Addr addr = pkt->getAddr();
MasterID master_id = pkt->req->masterId();
uint32_t task_id = pkt->req->taskId();
if (!blk->isTouched) {
tagsInUse++;
blk->isTouched = true;
@ -210,6 +211,8 @@ LRU::insertBlock(PacketPtr pkt, BlkType *blk)
assert(master_id < cache->system->maxMasters());
occupancies[master_id]++;
blk->srcMasterId = master_id;
blk->task_id = task_id;
blk->tickInserted = curTick();
unsigned set = extractSet(addr);
sets[set].moveToHead(blk);
@ -224,6 +227,8 @@ LRU::invalidate(BlkType *blk)
assert(blk->srcMasterId < cache->system->maxMasters());
occupancies[blk->srcMasterId]--;
blk->srcMasterId = Request::invldMasterId;
blk->task_id = ContextSwitchTaskId::Unknown;
blk->tickInserted = curTick();
// should be evicted before valid blocks
unsigned set = blk->set;
@ -270,3 +275,38 @@ LRU::cleanupRefs()
}
}
}
void
LRU::computeStats()
{
for (unsigned i = 0; i < ContextSwitchTaskId::NumTaskId; ++i) {
occupanciesTaskId[i] = 0;
for (unsigned j = 0; j < 5; ++j) {
ageTaskId[i][j] = 0;
}
}
for (unsigned i = 0; i < numSets * assoc; ++i) {
if (blks[i].isValid()) {
assert(blks[i].task_id < ContextSwitchTaskId::NumTaskId);
occupanciesTaskId[blks[i].task_id]++;
Tick age = curTick() - blks[i].tickInserted;
assert(age >= 0);
int age_index;
if (age / SimClock::Int::us < 10) { // <10us
age_index = 0;
} else if (age / SimClock::Int::us < 100) { // <100us
age_index = 1;
} else if (age / SimClock::Int::ms < 1) { // <1ms
age_index = 2;
} else if (age / SimClock::Int::ms < 10) { // <10ms
age_index = 3;
} else
age_index = 4; // >10ms
ageTaskId[blks[i].task_id][age_index]++;
}
}
}

View file

@ -252,6 +252,11 @@ public:
*/
virtual std::string print() const;
/**
* Called prior to dumping stats to compute task occupancy
*/
virtual void computeStats();
/**
* Visit each block in the tag store and apply a visitor to the
* block.

View file

@ -219,6 +219,11 @@ class Request
*/
Tick _time;
/**
* The task id associated with this request
*/
uint32_t _taskId;
/** The address space ID. */
int _asid;
@ -244,7 +249,8 @@ class Request
* default constructor.)
*/
Request()
: translateDelta(0), accessDelta(0), depth(0)
: _taskId(ContextSwitchTaskId::Unknown),
translateDelta(0), accessDelta(0), depth(0)
{}
/**
@ -253,16 +259,19 @@ class Request
* These fields are adequate to perform a request.
*/
Request(Addr paddr, int size, Flags flags, MasterID mid)
: _taskId(ContextSwitchTaskId::Unknown)
{
setPhys(paddr, size, flags, mid);
}
Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time)
: _taskId(ContextSwitchTaskId::Unknown)
{
setPhys(paddr, size, flags, mid, time);
}
Request(Addr paddr, int size, Flags flags, MasterID mid, Tick time, Addr pc)
: _taskId(ContextSwitchTaskId::Unknown)
{
setPhys(paddr, size, flags, mid, time);
privateFlags.set(VALID_PC);
@ -271,6 +280,7 @@ class Request
Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc,
int cid, ThreadID tid)
: _taskId(ContextSwitchTaskId::Unknown)
{
setVirt(asid, vaddr, size, flags, mid, pc);
setThreadContext(cid, tid);
@ -477,6 +487,17 @@ class Request
return _masterId;
}
uint32_t
taskId() const
{
return _taskId;
}
void
taskId(uint32_t id) {
_taskId = id;
}
/** Accessor function for asid.*/
int
getAsid()