mem: Make the coherent crossbar account for timing snoops
This patch introduces the concept of a snoop latency. Given the requirement to snoop and forward packets in zero time (due to the coherency mechanism), the latency is accounted for later. On a snoop, we establish the latency, and later add it to the header delay of the packet. To allow multiple caches to contribute to the snoop latency, we use a separate variable in the packet, and then take the maximum before adding it to the header delay.
This commit is contained in:
parent
3bd78a141e
commit
462c288a75
36
src/mem/cache/cache.cc
vendored
36
src/mem/cache/cache.cc
vendored
|
@ -1730,7 +1730,7 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
|
|||
memSidePort->schedTimingSnoopResp(pkt, forward_time, true);
|
||||
}
|
||||
|
||||
void
|
||||
uint32_t
|
||||
Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
|
||||
bool is_deferred, bool pending_inval)
|
||||
{
|
||||
|
@ -1748,6 +1748,8 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
|
|||
bool invalidate = pkt->isInvalidate();
|
||||
bool M5_VAR_USED needs_exclusive = pkt->needsExclusive();
|
||||
|
||||
uint32_t snoop_delay = 0;
|
||||
|
||||
if (forwardSnoops) {
|
||||
// first propagate snoop upward to see if anyone above us wants to
|
||||
// handle it. save & restore packet src since it will get
|
||||
|
@ -1765,6 +1767,12 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
|
|||
// time
|
||||
snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
|
||||
cpuSidePort->sendTimingSnoopReq(&snoopPkt);
|
||||
|
||||
// add the header delay (including crossbar and snoop
|
||||
// delays) of the upward snoop to the snoop delay for this
|
||||
// cache
|
||||
snoop_delay += snoopPkt.headerDelay;
|
||||
|
||||
if (snoopPkt.memInhibitAsserted()) {
|
||||
// cache-to-cache response from some upper cache
|
||||
assert(!alreadyResponded);
|
||||
|
@ -1796,7 +1804,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
|
|||
if (!blk || !blk->isValid()) {
|
||||
DPRINTF(Cache, "%s snoop miss for %s addr %#llx size %d\n",
|
||||
__func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
|
||||
return;
|
||||
return snoop_delay;
|
||||
} else {
|
||||
DPRINTF(Cache, "%s snoop hit for %s for addr %#llx size %d, "
|
||||
"old state is %s\n", __func__, pkt->cmdString(),
|
||||
|
@ -1824,7 +1832,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
|
|||
DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s from"
|
||||
" lower cache\n", pkt->getAddr(), pkt->cmdString());
|
||||
pkt->setBlockCached();
|
||||
return;
|
||||
return snoop_delay;
|
||||
}
|
||||
|
||||
if (!pkt->req->isUncacheable() && pkt->isRead() && !invalidate) {
|
||||
|
@ -1884,6 +1892,8 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
|
|||
}
|
||||
|
||||
DPRINTF(Cache, "new state is %s\n", blk->print());
|
||||
|
||||
return snoop_delay;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1907,6 +1917,14 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
|
|||
Addr blk_addr = blockAlign(pkt->getAddr());
|
||||
MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
|
||||
|
||||
// Update the latency cost of the snoop so that the crossbar can
|
||||
// account for it. Do not overwrite what other neighbouring caches
|
||||
// have already done, rather take the maximum. The update is
|
||||
// tentative, for cases where we return before an upward snoop
|
||||
// happens below.
|
||||
pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay,
|
||||
lookupLatency * clockPeriod());
|
||||
|
||||
// Inform request(Prefetch, CleanEvict or Writeback) from below of
|
||||
// MSHR hit, set setBlockCached.
|
||||
if (mshr && pkt->mustCheckAbove()) {
|
||||
|
@ -2004,7 +2022,12 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
|
|||
// We could be more selective and return here if the
|
||||
// request is non-exclusive or if the writeback is
|
||||
// exclusive.
|
||||
handleSnoop(pkt, blk, true, false, false);
|
||||
uint32_t snoop_delay = handleSnoop(pkt, blk, true, false, false);
|
||||
|
||||
// Override what we did when we first saw the snoop, as we now
|
||||
// also have the cost of the upwards snoops to account for
|
||||
pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay, snoop_delay +
|
||||
lookupLatency * clockPeriod());
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -2030,9 +2053,8 @@ Cache::recvAtomicSnoop(PacketPtr pkt)
|
|||
}
|
||||
|
||||
CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
|
||||
handleSnoop(pkt, blk, false, false, false);
|
||||
// We consider forwardLatency here because a snoop occurs in atomic mode
|
||||
return forwardLatency * clockPeriod();
|
||||
uint32_t snoop_delay = handleSnoop(pkt, blk, false, false, false);
|
||||
return snoop_delay + lookupLatency * clockPeriod();
|
||||
}
|
||||
|
||||
|
||||
|
|
17
src/mem/cache/cache.hh
vendored
17
src/mem/cache/cache.hh
vendored
|
@ -300,12 +300,19 @@ class Cache : public BaseCache
|
|||
bool already_copied, bool pending_inval);
|
||||
|
||||
/**
|
||||
* Sets the blk to the new state.
|
||||
* @param blk The cache block being snooped.
|
||||
* @param new_state The new coherence state for the block.
|
||||
* Perform an upward snoop if needed, and update the block state
|
||||
* (possibly invalidating the block). Also create a response if required.
|
||||
*
|
||||
* @param pkt Snoop packet
|
||||
* @param blk Cache block being snooped
|
||||
* @param is_timing Timing or atomic for the response
|
||||
* @param is_deferred Is this a deferred snoop or not?
|
||||
* @param pending_inval Do we have a pending invalidation?
|
||||
*
|
||||
* @return The snoop delay incurred by the upwards snoop
|
||||
*/
|
||||
void handleSnoop(PacketPtr ptk, CacheBlk *blk,
|
||||
bool is_timing, bool is_deferred, bool pending_inval);
|
||||
uint32_t handleSnoop(PacketPtr pkt, CacheBlk *blk,
|
||||
bool is_timing, bool is_deferred, bool pending_inval);
|
||||
|
||||
/**
|
||||
* Create a writeback request for the given block.
|
||||
|
|
|
@ -187,6 +187,8 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
|||
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
|
||||
|
||||
if (!system->bypassCaches()) {
|
||||
assert(pkt->snoopDelay == 0);
|
||||
|
||||
// the packet is a memory-mapped request and should be
|
||||
// broadcasted to our snoopers but the source
|
||||
if (snoopFilter) {
|
||||
|
@ -204,6 +206,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
|||
} else {
|
||||
forwardTiming(pkt, slave_port_id);
|
||||
}
|
||||
|
||||
// add the snoop delay to our header delay, and then reset it
|
||||
pkt->headerDelay += pkt->snoopDelay;
|
||||
pkt->snoopDelay = 0;
|
||||
}
|
||||
|
||||
// forwardTiming snooped into peer caches of the sender, and if
|
||||
|
@ -377,9 +383,15 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id)
|
|||
// we should only see express snoops from caches
|
||||
assert(pkt->isExpressSnoop());
|
||||
|
||||
// set the packet header and payload delay, for now use forward latency
|
||||
// @todo Assess the choice of latency further
|
||||
calcPacketTiming(pkt, forwardLatency * clockPeriod());
|
||||
|
||||
// remeber if the packet is inhibited so we can see if it changes
|
||||
const bool is_inhibited = pkt->memInhibitAsserted();
|
||||
|
||||
assert(pkt->snoopDelay == 0);
|
||||
|
||||
if (snoopFilter) {
|
||||
// let the Snoop Filter work its magic and guide probing
|
||||
auto sf_res = snoopFilter->lookupSnoop(pkt);
|
||||
|
@ -398,6 +410,10 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id)
|
|||
forwardTiming(pkt, InvalidPortID);
|
||||
}
|
||||
|
||||
// add the snoop delay to our header delay, and then reset it
|
||||
pkt->headerDelay += pkt->snoopDelay;
|
||||
pkt->snoopDelay = 0;
|
||||
|
||||
// if we can expect a response, remember how to route it
|
||||
if (!is_inhibited && pkt->memInhibitAsserted()) {
|
||||
assert(routeTo.find(pkt->req) == routeTo.end());
|
||||
|
|
|
@ -323,6 +323,14 @@ class Packet : public Printable
|
|||
*/
|
||||
uint32_t headerDelay;
|
||||
|
||||
/**
|
||||
* Keep track of the extra delay incurred by snooping upwards
|
||||
* before sending a request down the memory system. This is used
|
||||
* by the coherent crossbar to account for the additional request
|
||||
* delay.
|
||||
*/
|
||||
uint32_t snoopDelay;
|
||||
|
||||
/**
|
||||
* The extra pipelining delay from seeing the packet until the end of
|
||||
* payload is transmitted by the component that provided it (if
|
||||
|
@ -582,7 +590,7 @@ class Packet : public Printable
|
|||
*/
|
||||
Packet(const RequestPtr _req, MemCmd _cmd)
|
||||
: cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
|
||||
size(0), headerDelay(0), payloadDelay(0),
|
||||
size(0), headerDelay(0), snoopDelay(0), payloadDelay(0),
|
||||
senderState(NULL)
|
||||
{
|
||||
if (req->hasPaddr()) {
|
||||
|
@ -603,7 +611,7 @@ class Packet : public Printable
|
|||
*/
|
||||
Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize)
|
||||
: cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
|
||||
headerDelay(0), payloadDelay(0),
|
||||
headerDelay(0), snoopDelay(0), payloadDelay(0),
|
||||
senderState(NULL)
|
||||
{
|
||||
if (req->hasPaddr()) {
|
||||
|
@ -628,6 +636,7 @@ class Packet : public Printable
|
|||
addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
|
||||
bytesValid(pkt->bytesValid),
|
||||
headerDelay(pkt->headerDelay),
|
||||
snoopDelay(0),
|
||||
payloadDelay(pkt->payloadDelay),
|
||||
senderState(pkt->senderState)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue