diff --git a/configs/example/memtest.py b/configs/example/memtest.py index a3298890b..97bf79dff 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -299,6 +299,7 @@ make_cache_level(cachespec, cache_proto, len(cachespec), None) # Connect the lowest level crossbar to the memory last_subsys = getattr(system, 'l%dsubsys0' % len(cachespec)) last_subsys.xbar.master = system.physmem.port +last_subsys.xbar.point_of_coherency = True root = Root(full_system = False, system = system) if options.atomic: diff --git a/src/mem/XBar.py b/src/mem/XBar.py index 8614519b3..674f9262e 100644 --- a/src/mem/XBar.py +++ b/src/mem/XBar.py @@ -100,6 +100,12 @@ class CoherentXBar(BaseXBar): # An optional snoop filter snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter") + # Determine how this crossbar handles packets where caches have + # already committed to responding, by establishing if the crossbar + # is the point of coherency or not. + point_of_coherency = Param.Bool(False, "Consider this crossbar the " \ + "point of coherency") + system = Param.System(Parent.any, "System that the crossbar belongs to.") class SnoopFilter(SimObject): @@ -147,6 +153,11 @@ class SystemXBar(CoherentXBar): response_latency = 2 snoop_response_latency = 4 + # This specialisation of the coherent crossbar is to be considered + # the point of coherency, as there are no (coherent) downstream + # caches. + point_of_coherency = True + # In addition to the system interconnect, we typically also have one # or more on-chip I/O crossbars. Note that at some point we might want # to also define an off-chip I/O crossbar such as PCIe. diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 226647fdc..8a209e8b7 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -154,14 +154,8 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt) DPRINTF(Bridge, "recvTimingReq: %s addr 0x%x\n", pkt->cmdString(), pkt->getAddr()); - // if a cache is responding, sink the packet without further - // action, also discard any packet that is not a read or a write - if (pkt->cacheResponding() || - !(pkt->isWrite() || pkt->isRead())) { - assert(!pkt->needsResponse()); - pendingDelete.reset(pkt); - return true; - } + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); // we should not get a new request after committing to retry the // current one, but unfortunately the CPU violates this rule, so @@ -352,6 +346,9 @@ Bridge::BridgeSlavePort::recvRespRetry() Tick Bridge::BridgeSlavePort::recvAtomic(PacketPtr pkt) { + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + return delay * bridge.clockPeriod() + masterPort.sendAtomic(pkt); } diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index 189fd4cab..e9b909646 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -630,57 +630,55 @@ Cache::recvTimingReq(PacketPtr pkt) // flag) is not providing writable (it is in Owned rather than // the Modified state), we know that there may be other Shared // copies in the system; go out and invalidate them all - if (pkt->needsWritable() && !pkt->responderHadWritable()) { - // an upstream cache that had the line in Owned state - // (dirty, but not writable), is responding and thus - // transferring the dirty line from one branch of the - // cache hierarchy to another + assert(pkt->needsWritable() && !pkt->responderHadWritable()); - // send out an express snoop and invalidate all other - // copies (snooping a packet that needs writable is the - // same as an invalidation), thus turning the Owned line - // into a Modified line, note that we don't invalidate the - // block in the current cache or any other cache on the - // path to memory + // an upstream cache that had the line in Owned state + // (dirty, but not writable), is responding and thus + // transferring the dirty line from one branch of the + // cache hierarchy to another - // create a downstream express snoop with cleared packet - // flags, there is no need to allocate any data as the - // packet is merely used to co-ordinate state transitions - Packet *snoop_pkt = new Packet(pkt, true, false); + // send out an express snoop and invalidate all other + // copies (snooping a packet that needs writable is the + // same as an invalidation), thus turning the Owned line + // into a Modified line, note that we don't invalidate the + // block in the current cache or any other cache on the + // path to memory - // also reset the bus time that the original packet has - // not yet paid for - snoop_pkt->headerDelay = snoop_pkt->payloadDelay = 0; + // create a downstream express snoop with cleared packet + // flags, there is no need to allocate any data as the + // packet is merely used to co-ordinate state transitions + Packet *snoop_pkt = new Packet(pkt, true, false); - // make this an instantaneous express snoop, and let the - // other caches in the system know that the another cache - // is responding, because we have found the authorative - // copy (Modified or Owned) that will supply the right - // data - snoop_pkt->setExpressSnoop(); - snoop_pkt->setCacheResponding(); + // also reset the bus time that the original packet has + // not yet paid for + snoop_pkt->headerDelay = snoop_pkt->payloadDelay = 0; - // this express snoop travels towards the memory, and at - // every crossbar it is snooped upwards thus reaching - // every cache in the system - bool M5_VAR_USED success = memSidePort->sendTimingReq(snoop_pkt); - // express snoops always succeed - assert(success); + // make this an instantaneous express snoop, and let the + // other caches in the system know that the another cache + // is responding, because we have found the authorative + // copy (Modified or Owned) that will supply the right + // data + snoop_pkt->setExpressSnoop(); + snoop_pkt->setCacheResponding(); - // main memory will delete the snoop packet - } + // this express snoop travels towards the memory, and at + // every crossbar it is snooped upwards thus reaching + // every cache in the system + bool M5_VAR_USED success = memSidePort->sendTimingReq(snoop_pkt); + // express snoops always succeed + assert(success); + + // main memory will delete the snoop packet // queue for deletion, as opposed to immediate deletion, as // the sending cache is still relying on the packet pendingDelete.reset(pkt); - // no need to take any action in this particular cache as an - // upstream cache has already committed to responding, and - // either the packet does not need writable (and we can let - // the cache that set the cache responding flag pass on the - // line without any need for intervention), or if the packet - // needs writable it is provided, or we have already sent out - // any express snoops in the section above + // no need to take any further action in this particular cache + // as an upstram cache has already committed to responding, + // and we have already sent out any express snoops in the + // section above to ensure all other copies in the system are + // invalidated return true; } @@ -1028,9 +1026,8 @@ Cache::recvAtomic(PacketPtr pkt) // if a cache is responding, and it had the line in Owned // rather than Modified state, we need to invalidate any // copies that are not on the same path to memory - if (pkt->needsWritable() && !pkt->responderHadWritable()) { - lat += ticksToCycles(memSidePort->sendAtomic(pkt)); - } + assert(pkt->needsWritable() && !pkt->responderHadWritable()); + lat += ticksToCycles(memSidePort->sendAtomic(pkt)); return lat * clockPeriod(); } @@ -2493,11 +2490,8 @@ Cache::CpuSidePort::recvTimingReq(PacketPtr pkt) bool success = false; - // always let packets through if an upstream cache has committed - // to responding, even if blocked (we should technically look at - // the isExpressSnoop flag, but it is set by the cache itself, and - // consequently we have to rely on the cacheResponding flag) - if (pkt->cacheResponding()) { + // always let express snoop packets through if even if blocked + if (pkt->isExpressSnoop()) { // do not change the current retry state bool M5_VAR_USED bypass_success = cache->recvTimingReq(pkt); assert(bypass_success); diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 3731bea3f..22642e6f2 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -56,7 +56,8 @@ CoherentXBar::CoherentXBar(const CoherentXBarParams *p) : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter), - snoopResponseLatency(p->snoop_response_latency) + snoopResponseLatency(p->snoop_response_latency), + pointOfCoherency(p->point_of_coherency) { // create the ports based on the size of the master and slave // vector ports, and the presence of the default port, the ports @@ -219,32 +220,48 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) pkt->snoopDelay = 0; } - // forwardTiming snooped into peer caches of the sender, and if - // this is a clean evict or clean writeback, but the packet is - // found in a cache, do not forward it - if ((pkt->cmd == MemCmd::CleanEvict || - pkt->cmd == MemCmd::WritebackClean) && pkt->isBlockCached()) { - DPRINTF(CoherentXBar, "Clean evict/writeback %#llx still cached, " - "not forwarding\n", pkt->getAddr()); - - // update the layer state and schedule an idle event - reqLayers[master_port_id]->succeededTiming(packetFinishTime); - - // queue the packet for deletion - pendingDelete.reset(pkt); - - return true; - } + // set up a sensible starting point + bool success = true; // remember if the packet will generate a snoop response by // checking if a cache set the cacheResponding flag during the // snooping above const bool expect_snoop_resp = !cache_responding && pkt->cacheResponding(); - const bool expect_response = pkt->needsResponse() && - !pkt->cacheResponding(); + bool expect_response = pkt->needsResponse() && !pkt->cacheResponding(); - // since it is a normal request, attempt to send the packet - bool success = masterPorts[master_port_id]->sendTimingReq(pkt); + const bool sink_packet = sinkPacket(pkt); + + // in certain cases the crossbar is responsible for responding + bool respond_directly = false; + + if (sink_packet) { + DPRINTF(CoherentXBar, "Not forwarding %s to %#llx\n", + pkt->cmdString(), pkt->getAddr()); + } else { + // determine if we are forwarding the packet, or responding to + // it + if (!pointOfCoherency || pkt->isRead() || pkt->isWrite()) { + // if we are passing on, rather than sinking, a packet to + // which an upstream cache has committed to responding, + // the line was needs writable, and the responding only + // had an Owned copy, so we need to immidiately let the + // downstream caches know, bypass any flow control + if (pkt->cacheResponding()) { + pkt->setExpressSnoop(); + } + + // since it is a normal request, attempt to send the packet + success = masterPorts[master_port_id]->sendTimingReq(pkt); + } else { + // no need to forward, turn this packet around and respond + // directly + assert(pkt->needsResponse()); + + respond_directly = true; + assert(!expect_snoop_resp); + expect_response = false; + } + } if (snoopFilter && !system->bypassCaches()) { // Let the snoop filter know about the success of the send operation @@ -303,6 +320,27 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) snoops++; } + if (sink_packet) + // queue the packet for deletion + pendingDelete.reset(pkt); + + if (respond_directly) { + assert(pkt->needsResponse()); + assert(success); + + pkt->makeResponse(); + + if (snoopFilter && !system->bypassCaches()) { + // let the snoop filter inspect the response and update its state + snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]); + } + + Tick response_time = clockEdge() + pkt->headerDelay; + pkt->headerDelay = 0; + + slavePorts[slave_port_id]->schedTimingResp(pkt, response_time); + } + return success; } @@ -633,27 +671,35 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) snoop_response_latency += snoop_result.second; } - // forwardAtomic snooped into peer caches of the sender, and if - // this is a clean evict, but the packet is found in a cache, do - // not forward it - if ((pkt->cmd == MemCmd::CleanEvict || - pkt->cmd == MemCmd::WritebackClean) && pkt->isBlockCached()) { - DPRINTF(CoherentXBar, "Clean evict/writeback %#llx still cached, " - "not forwarding\n", pkt->getAddr()); - return 0; - } + // set up a sensible default value + Tick response_latency = 0; + + const bool sink_packet = sinkPacket(pkt); // even if we had a snoop response, we must continue and also // perform the actual request at the destination PortID master_port_id = findPort(pkt->getAddr()); + if (sink_packet) { + DPRINTF(CoherentXBar, "Not forwarding %s to %#llx\n", + pkt->cmdString(), pkt->getAddr()); + } else { + if (!pointOfCoherency || pkt->isRead() || pkt->isWrite()) { + // forward the request to the appropriate destination + response_latency = masterPorts[master_port_id]->sendAtomic(pkt); + } else { + // if it does not need a response we sink the packet above + assert(pkt->needsResponse()); + + pkt->makeResponse(); + } + } + // stats updates for the request pktCount[slave_port_id][master_port_id]++; pktSize[slave_port_id][master_port_id] += pkt_size; transDist[pkt_cmd]++; - // forward the request to the appropriate destination - Tick response_latency = masterPorts[master_port_id]->sendAtomic(pkt); // if lower levels have replied, tell the snoop filter if (!system->bypassCaches() && snoopFilter && pkt->isResponse()) { @@ -877,6 +923,30 @@ CoherentXBar::forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id) } } +bool +CoherentXBar::sinkPacket(const PacketPtr pkt) const +{ + // we can sink the packet if: + // 1) the crossbar is the point of coherency, and a cache is + // responding after being snooped + // 2) the crossbar is the point of coherency, and the packet is a + // coherency packet (not a read or a write) that does not + // require a response + // 3) this is a clean evict or clean writeback, but the packet is + // found in a cache above this crossbar + // 4) a cache is responding after being snooped, and the packet + // either does not need the block to be writable, or the cache + // that has promised to respond (setting the cache responding + // flag) is providing writable and thus had a Modified block, + // and no further action is needed + return (pointOfCoherency && pkt->cacheResponding()) || + (pointOfCoherency && !(pkt->isRead() || pkt->isWrite()) && + !pkt->needsResponse()) || + (pkt->isCleanEviction() && pkt->isBlockCached()) || + (pkt->cacheResponding() && + (!pkt->needsWritable() || pkt->responderHadWritable())); +} + void CoherentXBar::regStats() { diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh index 24d00628b..8c55b59da 100644 --- a/src/mem/coherent_xbar.hh +++ b/src/mem/coherent_xbar.hh @@ -273,6 +273,9 @@ class CoherentXBar : public BaseXBar /** Cycles of snoop response latency.*/ const Cycles snoopResponseLatency; + /** Is this crossbar the point of coherency? **/ + const bool pointOfCoherency; + /** * Upstream caches need this packet until true is returned, so * hold it for deletion until a subsequent call @@ -384,6 +387,12 @@ class CoherentXBar : public BaseXBar */ void forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id); + /** + * Determine if the crossbar should sink the packet, as opposed to + * forwarding it, or responding. + */ + bool sinkPacket(const PacketPtr pkt) const; + Stats::Scalar snoops; Stats::Distribution snoopFanout; diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index c7ad3b448..55e1cf805 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -273,11 +273,14 @@ DRAMCtrl::recvAtomic(PacketPtr pkt) { DPRINTF(DRAM, "recvAtomic: %s 0x%x\n", pkt->cmdString(), pkt->getAddr()); + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + // do the actual memory access and turn the packet into a response access(pkt); Tick latency = 0; - if (!pkt->cacheResponding() && pkt->hasData()) { + if (pkt->hasData()) { // this value is not supposed to be accurate, just enough to // keep things going, mimic a closed page latency = tRP + tRCD + tCL; @@ -590,11 +593,11 @@ DRAMCtrl::recvTimingReq(PacketPtr pkt) DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n", pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - // if a cache is responding, sink the packet without further action - if (pkt->cacheResponding()) { - pendingDelete.reset(pkt); - return true; - } + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(!(pkt->isRead() || pkt->isWrite()), + "Should only see read and writes at memory controller\n"); // Calc avg gap between requests if (prevArrival != 0) { @@ -625,7 +628,8 @@ DRAMCtrl::recvTimingReq(PacketPtr pkt) readReqs++; bytesReadSys += size; } - } else if (pkt->isWrite()) { + } else { + assert(pkt->isWrite()); assert(size != 0); if (writeQueueFull(dram_pkt_count)) { DPRINTF(DRAM, "Write queue full, not accepting\n"); @@ -638,10 +642,6 @@ DRAMCtrl::recvTimingReq(PacketPtr pkt) writeReqs++; bytesWrittenSys += size; } - } else { - DPRINTF(DRAM,"Neither read nor write, ignore timing\n"); - neitherReadNorWrite++; - accessAndRespond(pkt, 1); } return true; diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc index 8760c2bef..bb44b8c85 100644 --- a/src/mem/simple_mem.cc +++ b/src/mem/simple_mem.cc @@ -72,8 +72,11 @@ SimpleMemory::init() Tick SimpleMemory::recvAtomic(PacketPtr pkt) { + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + access(pkt); - return pkt->cacheResponding() ? 0 : getLatency(); + return getLatency(); } void @@ -97,11 +100,12 @@ SimpleMemory::recvFunctional(PacketPtr pkt) bool SimpleMemory::recvTimingReq(PacketPtr pkt) { - // if a cache is responding, sink the packet without further action - if (pkt->cacheResponding()) { - pendingDelete.reset(pkt); - return true; - } + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(!(pkt->isRead() || pkt->isWrite()), + "Should only see read and writes at memory controller, " + "saw %s to %#llx\n", pkt->cmdString(), pkt->getAddr()); // we should not get a new request after committing to retry the // current one, but unfortunately the CPU violates this rule, so @@ -127,21 +131,16 @@ SimpleMemory::recvTimingReq(PacketPtr pkt) // rather than long term as it is the short term data rate that is // limited for any real memory - // only look at reads and writes when determining if we are busy, - // and for how long, as it is not clear what to regulate for the - // other types of commands - if (pkt->isRead() || pkt->isWrite()) { - // calculate an appropriate tick to release to not exceed - // the bandwidth limit - Tick duration = pkt->getSize() * bandwidth; + // calculate an appropriate tick to release to not exceed + // the bandwidth limit + Tick duration = pkt->getSize() * bandwidth; - // only consider ourselves busy if there is any need to wait - // to avoid extra events being scheduled for (infinitely) fast - // memories - if (duration != 0) { - schedule(releaseEvent, curTick() + duration); - isBusy = true; - } + // only consider ourselves busy if there is any need to wait + // to avoid extra events being scheduled for (infinitely) fast + // memories + if (duration != 0) { + schedule(releaseEvent, curTick() + duration); + isBusy = true; } // go ahead and deal with the packet and put the response in the