mem: Clarify usage of latency in the cache
This patch adds some much-needed clarity in the specification of the cache timing. For now, hit_latency and response_latency are kept as top-level parameters, but the cache itself has a number of local variables to better map the individual timing variables to different behaviours (and sub-components). The introduced variables are: - lookupLatency: latency of tag lookup, occuring on any access - forwardLatency: latency that occurs in case of outbound miss - fillLatency: latency to fill a cache block We keep the existing responseLatency The forwardLatency is used by allocateInternalBuffer() for: - MSHR allocateWriteBuffer (unchached write forwarded to WriteBuffer); - MSHR allocateMissBuffer (cacheable miss in MSHR queue); - MSHR allocateUncachedReadBuffer (unchached read allocated in MSHR queue) It is our assumption that the time for the above three buffers is the same. Similarly, for snoop responses passing through the cache we use forwardLatency.
This commit is contained in:
parent
5a573762d0
commit
e2828587b3
9 changed files with 111 additions and 66 deletions
4
src/mem/cache/base.cc
vendored
4
src/mem/cache/base.cc
vendored
|
@ -72,7 +72,9 @@ BaseCache::BaseCache(const Params *p)
|
|||
writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, 0,
|
||||
MSHRQueue_WriteBuffer),
|
||||
blkSize(p->system->cacheLineSize()),
|
||||
hitLatency(p->hit_latency),
|
||||
lookupLatency(p->hit_latency),
|
||||
forwardLatency(p->hit_latency),
|
||||
fillLatency(p->response_latency),
|
||||
responseLatency(p->response_latency),
|
||||
numTarget(p->tgts_per_mshr),
|
||||
forwardSnoops(p->forward_snoops),
|
||||
|
|
35
src/mem/cache/base.hh
vendored
35
src/mem/cache/base.hh
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2012-2013 ARM Limited
|
||||
* Copyright (c) 2012-2013, 2015 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
|
@ -202,6 +202,17 @@ class BaseCache : public MemObject
|
|||
/** Write/writeback buffer */
|
||||
MSHRQueue writeBuffer;
|
||||
|
||||
/**
|
||||
* Allocate a buffer, passing the time indicating when schedule an
|
||||
* event to the queued port to go and ask the MSHR and write queue
|
||||
* if they have packets to send.
|
||||
*
|
||||
* allocateBufferInternal() function is called in:
|
||||
* - MSHR allocateWriteBuffer (unchached write forwarded to WriteBuffer);
|
||||
* - MSHR allocateMissBuffer (cacheable miss in MSHR queue);
|
||||
* - MSHR allocateUncachedReadBuffer (unchached read allocated in MSHR
|
||||
* queue)
|
||||
*/
|
||||
MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
|
||||
PacketPtr pkt, Tick time, bool requestBus)
|
||||
{
|
||||
|
@ -251,15 +262,25 @@ class BaseCache : public MemObject
|
|||
const unsigned blkSize;
|
||||
|
||||
/**
|
||||
* The latency of a hit in this device.
|
||||
* The latency of tag lookup of a cache. It occurs when there is
|
||||
* an access to the cache.
|
||||
*/
|
||||
const Cycles hitLatency;
|
||||
const Cycles lookupLatency;
|
||||
|
||||
/**
|
||||
* The latency of sending reponse to its upper level cache/core on a
|
||||
* linefill. In most contemporary processors, the return path on a cache
|
||||
* miss is much quicker that the hit latency. The responseLatency parameter
|
||||
* tries to capture this latency.
|
||||
* This is the forward latency of the cache. It occurs when there
|
||||
* is a cache miss and a request is forwarded downstream, in
|
||||
* particular an outbound miss.
|
||||
*/
|
||||
const Cycles forwardLatency;
|
||||
|
||||
/** The latency to fill a cache block */
|
||||
const Cycles fillLatency;
|
||||
|
||||
/**
|
||||
* The latency of sending reponse to its upper level cache/core on
|
||||
* a linefill. The responseLatency parameter captures this
|
||||
* latency.
|
||||
*/
|
||||
const Cycles responseLatency;
|
||||
|
||||
|
|
103
src/mem/cache/cache_impl.hh
vendored
103
src/mem/cache/cache_impl.hh
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010-2014 ARM Limited
|
||||
* Copyright (c) 2010-2015 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
|
@ -314,11 +314,14 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
|
|||
if (pkt->req->isUncacheable()) {
|
||||
uncacheableFlush(pkt);
|
||||
blk = NULL;
|
||||
lat = hitLatency;
|
||||
// lookupLatency is the latency in case the request is uncacheable.
|
||||
lat = lookupLatency;
|
||||
return false;
|
||||
}
|
||||
|
||||
int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
|
||||
// Here lat is the value passed as parameter to accessBlock() function
|
||||
// that can modify its value.
|
||||
blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat, id);
|
||||
|
||||
DPRINTF(Cache, "%s%s %x (%s) %s %s\n", pkt->cmdString(),
|
||||
|
@ -392,7 +395,6 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
|
|||
{
|
||||
DPRINTF(Cache, "%s for %s address %x size %d\n", __func__,
|
||||
pkt->cmdString(), pkt->getAddr(), pkt->getSize());
|
||||
Tick time = clockEdge(hitLatency);
|
||||
|
||||
assert(pkt->isResponse());
|
||||
|
||||
|
@ -418,7 +420,10 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
|
|||
delete rec;
|
||||
// @todo someone should pay for this
|
||||
pkt->firstWordDelay = pkt->lastWordDelay = 0;
|
||||
memSidePort->schedTimingSnoopResp(pkt, time);
|
||||
// forwardLatency is set here because there is a response from an
|
||||
// upper level cache.
|
||||
memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
|
||||
|
||||
}
|
||||
|
||||
template<class TagStore>
|
||||
|
@ -449,9 +454,6 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
delete pendingDelete[x];
|
||||
pendingDelete.clear();
|
||||
|
||||
// we charge hitLatency for doing just about anything here
|
||||
Tick time = clockEdge(hitLatency);
|
||||
|
||||
assert(pkt->isRequest());
|
||||
|
||||
// Just forward the packet if caches are disabled.
|
||||
|
@ -527,21 +529,34 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
// prefetching (cache loading) uncacheable data is nonsensical
|
||||
pkt->makeTimingResponse();
|
||||
std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
|
||||
cpuSidePort->schedTimingResp(pkt, clockEdge(hitLatency));
|
||||
// We use lookupLatency here because the request is uncacheable
|
||||
cpuSidePort->schedTimingResp(pkt, clockEdge(lookupLatency));
|
||||
return true;
|
||||
} else if (pkt->isWrite() && !pkt->isRead()) {
|
||||
allocateWriteBuffer(pkt, time, true);
|
||||
// We use forwardLatency here because there is an uncached
|
||||
// memory write, forwarded to WriteBuffer. It specifies the
|
||||
// latency to allocate an internal buffer and to schedule an
|
||||
// event to the queued port.
|
||||
allocateWriteBuffer(pkt, clockEdge(forwardLatency), true);
|
||||
} else {
|
||||
allocateUncachedReadBuffer(pkt, time, true);
|
||||
// We use forwardLatency here because there is an uncached
|
||||
// memory read, allocateded to MSHR queue (it requires the same
|
||||
// time of forwarding to WriteBuffer, in our assumption). It
|
||||
// specifies the latency to allocate an internal buffer and to
|
||||
// schedule an event to the queued port.
|
||||
allocateUncachedReadBuffer(pkt, clockEdge(forwardLatency), true);
|
||||
}
|
||||
assert(pkt->needsResponse()); // else we should delete it here??
|
||||
return true;
|
||||
}
|
||||
|
||||
Cycles lat = hitLatency;
|
||||
// We use lookupLatency here because it is used to specify the latency
|
||||
// to access.
|
||||
Cycles lat = lookupLatency;
|
||||
BlkType *blk = NULL;
|
||||
PacketList writebacks;
|
||||
|
||||
// Note that lat is passed by reference here. The function access() calls
|
||||
// accessBlock() which can modify lat value.
|
||||
bool satisfied = access(pkt, blk, lat, writebacks);
|
||||
|
||||
// track time of availability of next prefetch, if any
|
||||
|
@ -565,6 +580,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
pkt->makeTimingResponse();
|
||||
// @todo: Make someone pay for this
|
||||
pkt->firstWordDelay = pkt->lastWordDelay = 0;
|
||||
|
||||
// In this case we are considering lat neglecting
|
||||
// responseLatency, modelling hit latency just as
|
||||
// lookupLatency We pass lat by reference to access(),
|
||||
// which calls accessBlock() function. If it is a hit,
|
||||
// accessBlock() can modify lat to override the
|
||||
// lookupLatency value.
|
||||
cpuSidePort->schedTimingResp(pkt, clockEdge(lat));
|
||||
} else {
|
||||
/// @todo nominally we should just delete the packet here,
|
||||
|
@ -638,7 +660,12 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
|
||||
mshr->threadNum = -1;
|
||||
}
|
||||
mshr->allocateTarget(pkt, time, order++);
|
||||
// We use forwardLatency here because it is the same
|
||||
// considering new targets. We have multiple requests for the
|
||||
// same address here. It pecifies the latency to allocate an
|
||||
// internal buffer and to schedule an event to the queued
|
||||
// port.
|
||||
mshr->allocateTarget(pkt, clockEdge(forwardLatency), order++);
|
||||
if (mshr->getNumTargets() == numTarget) {
|
||||
noTargetMSHR = mshr;
|
||||
setBlocked(Blocked_NoTargets);
|
||||
|
@ -669,7 +696,11 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
// no-write-allocate or bypass accesses this will have to
|
||||
// be changed.
|
||||
if (pkt->cmd == MemCmd::Writeback) {
|
||||
allocateWriteBuffer(pkt, time, true);
|
||||
// We use forwardLatency here because there is an
|
||||
// uncached memory write, forwarded to WriteBuffer. It
|
||||
// specifies the latency to allocate an internal buffer and to
|
||||
// schedule an event to the queued port.
|
||||
allocateWriteBuffer(pkt, clockEdge(forwardLatency), true);
|
||||
} else {
|
||||
if (blk && blk->isValid()) {
|
||||
// If we have a write miss to a valid block, we
|
||||
|
@ -691,8 +722,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
assert(!blk->isWritable());
|
||||
blk->status &= ~BlkReadable;
|
||||
}
|
||||
|
||||
allocateMissBuffer(pkt, time, true);
|
||||
// Here we are using forwardLatency, modelling the latency of
|
||||
// a miss (outbound) just as forwardLatency, neglecting the
|
||||
// lookupLatency component. In this case this latency value
|
||||
// specifies the latency to allocate an internal buffer and to
|
||||
// schedule an event to the queued port, when a cacheable miss
|
||||
// is forwarded to MSHR queue.
|
||||
allocateMissBuffer(pkt, clockEdge(forwardLatency), true);
|
||||
}
|
||||
|
||||
if (prefetcher) {
|
||||
|
@ -702,14 +738,17 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Here we condiser just forward latency.
|
||||
if (next_pf_time != MaxTick)
|
||||
requestMemSideBus(Request_PF, std::max(time, next_pf_time));
|
||||
|
||||
requestMemSideBus(Request_PF, std::max(clockEdge(forwardLatency),
|
||||
next_pf_time));
|
||||
// copy writebacks to write buffer
|
||||
while (!writebacks.empty()) {
|
||||
PacketPtr wbPkt = writebacks.front();
|
||||
allocateWriteBuffer(wbPkt, time, true);
|
||||
// We use forwardLatency here because we are copying writebacks
|
||||
// to write buffer. It specifies the latency to allocate an internal
|
||||
// buffer and to schedule an event to the queued port.
|
||||
allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
|
||||
writebacks.pop_front();
|
||||
}
|
||||
|
||||
|
@ -778,8 +817,8 @@ template<class TagStore>
|
|||
Tick
|
||||
Cache<TagStore>::recvAtomic(PacketPtr pkt)
|
||||
{
|
||||
Cycles lat = hitLatency;
|
||||
|
||||
// We are in atomic mode so we pay just for lookupLatency here.
|
||||
Cycles lat = lookupLatency;
|
||||
// @TODO: make this a parameter
|
||||
bool last_level_cache = false;
|
||||
|
||||
|
@ -996,7 +1035,6 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
|
|||
{
|
||||
assert(pkt->isResponse());
|
||||
|
||||
Tick time = clockEdge(hitLatency);
|
||||
MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
|
||||
bool is_error = pkt->isError();
|
||||
|
||||
|
@ -1221,13 +1259,18 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
|
|||
// copy writebacks to write buffer
|
||||
while (!writebacks.empty()) {
|
||||
PacketPtr wbPkt = writebacks.front();
|
||||
allocateWriteBuffer(wbPkt, time, true);
|
||||
allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
|
||||
writebacks.pop_front();
|
||||
}
|
||||
// if we used temp block, clear it out
|
||||
if (blk == tempBlock) {
|
||||
if (blk->isDirty()) {
|
||||
allocateWriteBuffer(writebackBlk(blk), time, true);
|
||||
// We use forwardLatency here because we are copying
|
||||
// writebacks to write buffer. It specifies the latency to
|
||||
// allocate an internal buffer and to schedule an event to the
|
||||
// queued port.
|
||||
allocateWriteBuffer(writebackBlk(blk), clockEdge(forwardLatency),
|
||||
true);
|
||||
}
|
||||
blk->invalidate();
|
||||
}
|
||||
|
@ -1467,8 +1510,8 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
|
|||
assert(pkt->hasData());
|
||||
std::memcpy(blk->data, pkt->getConstPtr<uint8_t>(), blkSize);
|
||||
}
|
||||
|
||||
blk->whenReady = clockEdge() + responseLatency * clockPeriod() +
|
||||
// We pay for fillLatency here.
|
||||
blk->whenReady = clockEdge() + fillLatency * clockPeriod() +
|
||||
pkt->lastWordDelay;
|
||||
|
||||
return blk;
|
||||
|
@ -1521,7 +1564,8 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
|
|||
}
|
||||
DPRINTF(Cache, "%s created response: %s address %x size %d\n",
|
||||
__func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
|
||||
memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency));
|
||||
// We model a snoop just considering forwardLatency
|
||||
memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
|
||||
}
|
||||
|
||||
template<class TagStore>
|
||||
|
@ -1794,7 +1838,8 @@ Cache<TagStore>::recvAtomicSnoop(PacketPtr pkt)
|
|||
|
||||
BlkType *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
|
||||
handleSnoop(pkt, blk, false, false, false);
|
||||
return hitLatency * clockPeriod();
|
||||
// We consider forwardLatency here because a snoop occurs in atomic mode
|
||||
return forwardLatency * clockPeriod();
|
||||
}
|
||||
|
||||
|
||||
|
|
2
src/mem/cache/tags/base.cc
vendored
2
src/mem/cache/tags/base.cc
vendored
|
@ -55,7 +55,7 @@ using namespace std;
|
|||
|
||||
BaseTags::BaseTags(const Params *p)
|
||||
: ClockedObject(p), blkSize(p->block_size), size(p->size),
|
||||
hitLatency(p->hit_latency), cache(nullptr), warmupBound(0),
|
||||
accessLatency(p->hit_latency), cache(nullptr), warmupBound(0),
|
||||
warmedUp(false), numBlocks(0)
|
||||
{
|
||||
}
|
||||
|
|
5
src/mem/cache/tags/base.hh
vendored
5
src/mem/cache/tags/base.hh
vendored
|
@ -68,9 +68,8 @@ class BaseTags : public ClockedObject
|
|||
const unsigned blkSize;
|
||||
/** The size of the cache. */
|
||||
const unsigned size;
|
||||
/** The hit latency of the cache. */
|
||||
const Cycles hitLatency;
|
||||
|
||||
/** The access latency of the cache. */
|
||||
const Cycles accessLatency;
|
||||
/** Pointer to the parent cache. */
|
||||
BaseCache *cache;
|
||||
|
||||
|
|
3
src/mem/cache/tags/base_set_assoc.cc
vendored
3
src/mem/cache/tags/base_set_assoc.cc
vendored
|
@ -68,9 +68,6 @@ BaseSetAssoc::BaseSetAssoc(const Params *p)
|
|||
if (assoc <= 0) {
|
||||
fatal("associativity must be greater than zero");
|
||||
}
|
||||
if (hitLatency <= 0) {
|
||||
fatal("access latency must be greater than zero");
|
||||
}
|
||||
|
||||
blkMask = blkSize - 1;
|
||||
setShift = floorLog2(blkSize);
|
||||
|
|
12
src/mem/cache/tags/base_set_assoc.hh
vendored
12
src/mem/cache/tags/base_set_assoc.hh
vendored
|
@ -178,7 +178,7 @@ public:
|
|||
Addr tag = extractTag(addr);
|
||||
int set = extractSet(addr);
|
||||
BlkType *blk = sets[set].findBlk(tag, is_secure);
|
||||
lat = hitLatency;
|
||||
lat = accessLatency;;
|
||||
|
||||
// Access all tags in parallel, hence one in each way. The data side
|
||||
// either accesses all blocks in parallel, or one block sequentially on
|
||||
|
@ -195,7 +195,7 @@ public:
|
|||
if (blk != NULL) {
|
||||
if (blk->whenReady > curTick()
|
||||
&& cache->ticksToCycles(blk->whenReady - curTick())
|
||||
> hitLatency) {
|
||||
> accessLatency) {
|
||||
lat = cache->ticksToCycles(blk->whenReady - curTick());
|
||||
}
|
||||
blk->refCount += 1;
|
||||
|
@ -342,14 +342,6 @@ public:
|
|||
return ((tag << tagShift) | ((Addr)set << setShift));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the hit latency.
|
||||
* @return the hit latency.
|
||||
*/
|
||||
Cycles getHitLatency() const
|
||||
{
|
||||
return hitLatency;
|
||||
}
|
||||
/**
|
||||
*iterated through all blocks and clear all locks
|
||||
*Needed to clear all lock tracking at once
|
||||
|
|
4
src/mem/cache/tags/fa_lru.cc
vendored
4
src/mem/cache/tags/fa_lru.cc
vendored
|
@ -60,8 +60,6 @@ FALRU::FALRU(const Params *p)
|
|||
if (!isPowerOf2(blkSize))
|
||||
fatal("cache block size (in bytes) `%d' must be a power of two",
|
||||
blkSize);
|
||||
if (!(hitLatency > 0))
|
||||
fatal("Access latency in cycles must be at least one cycle");
|
||||
if (!isPowerOf2(size))
|
||||
fatal("Cache Size must be power of 2 for now");
|
||||
|
||||
|
@ -202,7 +200,7 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src,
|
|||
*inCache = tmp_in_cache;
|
||||
}
|
||||
|
||||
lat = hitLatency;
|
||||
lat = accessLatency;
|
||||
//assert(check());
|
||||
return blk;
|
||||
}
|
||||
|
|
9
src/mem/cache/tags/fa_lru.hh
vendored
9
src/mem/cache/tags/fa_lru.hh
vendored
|
@ -209,15 +209,6 @@ public:
|
|||
|
||||
void insertBlock(PacketPtr pkt, BlkType *blk);
|
||||
|
||||
/**
|
||||
* Return the hit latency of this cache.
|
||||
* @return The hit latency.
|
||||
*/
|
||||
Cycles getHitLatency() const
|
||||
{
|
||||
return hitLatency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the block size of this cache.
|
||||
* @return The block size.
|
||||
|
|
Loading…
Reference in a new issue