From 91f7b065a9b34ce0d3951001e30a9372d9b9dba9 Mon Sep 17 00:00:00 2001 From: Uri Wiener Date: Thu, 30 May 2013 12:53:58 -0400 Subject: [PATCH] mem: Add basic stats to the buses This patch adds a basic set of stats which are hard to impossible to implement using only communication monitors, and are needed for insight such as bus utilization, transactions through the bus etc. Stats added include throughput and transaction distribution, and also a two-dimensional vector capturing how many packets and how much data is exchanged between the masters and slaves connected to the bus. --- src/mem/bus.cc | 69 +++++++++++++++++++++++ src/mem/bus.hh | 30 ++++++++++ src/mem/coherent_bus.cc | 110 +++++++++++++++++++++++++++++++++---- src/mem/coherent_bus.hh | 5 ++ src/mem/noncoherent_bus.cc | 65 ++++++++++++++++++++-- src/mem/noncoherent_bus.hh | 5 ++ 6 files changed, 266 insertions(+), 18 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 41406e87a..8e74212e0 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -179,6 +179,9 @@ void BaseBus::Layer::occupyLayer(Tick until) assert(until != 0); bus.schedule(releaseEvent, until); + // account for the occupied ticks + occupancy += until - curTick(); + DPRINTF(BaseBus, "The bus is now busy from tick %d to %d\n", curTick(), until); } @@ -558,6 +561,52 @@ BaseBus::deviceBlockSize() const return blockSize; } +void +BaseBus::regStats() +{ + using namespace Stats; + + transDist + .init(MemCmd::NUM_MEM_CMDS) + .name(name() + ".trans_dist") + .desc("Transaction distribution") + .flags(nozero); + + // get the string representation of the commands + for (int i = 0; i < MemCmd::NUM_MEM_CMDS; i++) { + MemCmd cmd(i); + const std::string &cstr = cmd.toString(); + transDist.subname(i, cstr); + } + + pktCount + .init(slavePorts.size(), masterPorts.size()) + .name(name() + ".pkt_count") + .desc("Packet count per connected master and slave (bytes)") + .flags(total | nozero | nonan); + + totPktSize + .init(slavePorts.size(), masterPorts.size()) + .name(name() + ".tot_pkt_size") + .desc("Cumulative packet size per connected master and slave (bytes)") + .flags(total | nozero | nonan); + + // both the packet count and total size are two-dimensional + // vectors, indexed by slave port id and master port id, thus the + // neighbouring master and slave, they do not differentiate what + // came from the master and was forwarded to the slave (requests + // and snoop responses) and what came from the slave and was + // forwarded to the master (responses and snoop requests) + for (int i = 0; i < slavePorts.size(); i++) { + pktCount.subname(i, slavePorts[i]->getMasterPort().name()); + totPktSize.subname(i, slavePorts[i]->getMasterPort().name()); + for (int j = 0; j < masterPorts.size(); j++) { + pktCount.ysubname(j, masterPorts[j]->getSlavePort().name()); + totPktSize.ysubname(j, masterPorts[j]->getSlavePort().name()); + } + } +} + template unsigned int BaseBus::Layer::drain(DrainManager *dm) @@ -573,6 +622,26 @@ BaseBus::Layer::drain(DrainManager *dm) return 0; } +template +void +BaseBus::Layer::regStats() +{ + using namespace Stats; + + occupancy + .name(name() + ".occupancy") + .desc("Layer occupancy (ticks)") + .flags(nozero); + + utilization + .name(name() + ".utilization") + .desc("Layer utilization (%)") + .precision(1) + .flags(nozero); + + utilization = 100 * occupancy / simTicks; +} + /** * Bus layer template instantiations. Could be removed with _impl.hh * file, but since there are only two given options (MasterPort and diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 7492cf622..5e9023c89 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -57,6 +57,7 @@ #include "base/types.hh" #include "mem/mem_object.hh" #include "params/BaseBus.hh" +#include "sim/stats.hh" /** * The base bus contains the common elements of the non-coherent and @@ -179,6 +180,11 @@ class BaseBus : public MemObject */ void recvRetry(PortID port_id); + /** + * Register stats for the layer + */ + void regStats(); + private: /** The bus this layer is a part of. */ @@ -246,6 +252,14 @@ class BaseBus : public MemObject /** event used to schedule a release of the layer */ EventWrapper releaseEvent; + /** + * Stats for occupancy and utilization. These stats capture + * the time the bus spends in the busy state and are thus only + * relevant when the memory system is in timing mode. + */ + Stats::Scalar occupancy; + Stats::Formula utilization; + }; /** cycles of overhead per transaction */ @@ -381,6 +395,20 @@ class BaseBus : public MemObject virtual ~BaseBus(); + /** + * Stats for transaction distribution and data passing through the + * bus. The transaction distribution is globally counting + * different types of commands. The packet count and total packet + * size are two-dimensional vectors that are indexed by the bus + * slave port and master port id (thus the neighbouring master and + * neighbouring slave), summing up both directions (request and + * response). + */ + Stats::Formula throughput; + Stats::Vector transDist; + Stats::Vector2d pktCount; + Stats::Vector2d totPktSize; + public: virtual void init(); @@ -393,6 +421,8 @@ class BaseBus : public MemObject virtual unsigned int drain(DrainManager *dm) = 0; + virtual void regStats(); + }; #endif //__MEM_BUS_HH__ diff --git a/src/mem/coherent_bus.cc b/src/mem/coherent_bus.cc index 5f7153d2f..1edd63b09 100644 --- a/src/mem/coherent_bus.cc +++ b/src/mem/coherent_bus.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -125,11 +125,11 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) bool is_express_snoop = pkt->isExpressSnoop(); // determine the destination based on the address - PortID dest_port_id = findPort(pkt->getAddr()); + PortID master_port_id = findPort(pkt->getAddr()); // test if the bus should be considered occupied for the current // port, and exclude express snoops from the check - if (!is_express_snoop && !reqLayer.tryTiming(src_port, dest_port_id)) { + if (!is_express_snoop && !reqLayer.tryTiming(src_port, master_port_id)) { DPRINTF(CoherentBus, "recvTimingReq: src %s %s 0x%x BUS BUSY\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); return false; @@ -139,6 +139,11 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) src_port->name(), pkt->cmdString(), is_express_snoop, pkt->getAddr()); + // store size and command as they might be modified when + // forwarding the packet + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; + unsigned int pkt_cmd = pkt->cmdToIndex(); + // set the source port for routing of the response pkt->setSrc(slave_port_id); @@ -169,11 +174,12 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) } // since it is a normal request, attempt to send the packet - bool success = masterPorts[dest_port_id]->sendTimingReq(pkt); + bool success = masterPorts[master_port_id]->sendTimingReq(pkt); // if this is an express snoop, we are done at this point if (is_express_snoop) { assert(success); + snoopDataThroughBus += pkt_size; } else { // for normal requests, check if successful if (!success) { @@ -192,14 +198,22 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) src_port->name(), pkt->cmdString(), pkt->getAddr()); // update the bus state and schedule an idle event - reqLayer.failedTiming(src_port, dest_port_id, + reqLayer.failedTiming(src_port, master_port_id, clockEdge(Cycles(headerCycles))); } else { // update the bus state and schedule an idle event reqLayer.succeededTiming(packetFinishTime); + dataThroughBus += pkt_size; } } + // stats updates only consider packets that were successfully sent + if (success) { + pktCount[slave_port_id][master_port_id]++; + totPktSize[slave_port_id][master_port_id] += pkt_size; + transDist[pkt_cmd]++; + } + return success; } @@ -220,6 +234,11 @@ CoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) DPRINTF(CoherentBus, "recvTimingResp: src %s %s 0x%x\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); + // store size and command as they might be modified when + // forwarding the packet + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; + unsigned int pkt_cmd = pkt->cmdToIndex(); + calcPacketTiming(pkt); Tick packetFinishTime = pkt->busLastWordDelay + curTick(); @@ -230,9 +249,11 @@ CoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) // remove it as outstanding outstandingReq.erase(pkt->req); - // send the packet to the destination through one of our slave - // ports, as determined by the destination field - bool success M5_VAR_USED = slavePorts[pkt->getDest()]->sendTimingResp(pkt); + // determine the destination based on what is stored in the packet + PortID slave_port_id = pkt->getDest(); + + // send the packet through the destination slave port + bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt); // currently it is illegal to block responses... can lead to // deadlock @@ -240,6 +261,12 @@ CoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) respLayer.succeededTiming(packetFinishTime); + // stats updates + dataThroughBus += pkt_size; + pktCount[slave_port_id][master_port_id]++; + totPktSize[slave_port_id][master_port_id] += pkt_size; + transDist[pkt_cmd]++; + return true; } @@ -250,6 +277,10 @@ CoherentBus::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id) masterPorts[master_port_id]->name(), pkt->cmdString(), pkt->getAddr()); + // update stats here as we know the forwarding will succeed + transDist[pkt->cmdToIndex()]++; + snoopDataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + // we should only see express snoops from caches assert(pkt->isExpressSnoop()); @@ -286,8 +317,13 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) DPRINTF(CoherentBus, "recvTimingSnoop: src %s %s 0x%x\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); + // store size and command as they might be modified when + // forwarding the packet + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; + unsigned int pkt_cmd = pkt->cmdToIndex(); + // get the destination from the packet - PortID dest = pkt->getDest(); + PortID dest_port_id = pkt->getDest(); // responses are never express snoops assert(!pkt->isExpressSnoop()); @@ -303,7 +339,10 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) // this is a snoop response to a snoop request we // forwarded, e.g. coming from the L1 and going to the L2 // this should be forwarded as a snoop response - bool success M5_VAR_USED = masterPorts[dest]->sendTimingSnoopResp(pkt); + bool success M5_VAR_USED = + masterPorts[dest_port_id]->sendTimingSnoopResp(pkt); + pktCount[slave_port_id][dest_port_id]++; + totPktSize[slave_port_id][dest_port_id] += pkt_size; assert(success); } else { // we got a snoop response on one of our slave ports, @@ -317,11 +356,12 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) // request, hence it should never go back to where the // snoop response came from, but instead to where the // original request came from - assert(slave_port_id != dest); + assert(slave_port_id != dest_port_id); // as a normal response, it should go back to a master // through one of our slave ports - bool success M5_VAR_USED = slavePorts[dest]->sendTimingResp(pkt); + bool success M5_VAR_USED = + slavePorts[dest_port_id]->sendTimingResp(pkt); // currently it is illegal to block responses... can lead // to deadlock @@ -330,6 +370,10 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) snoopRespLayer.succeededTiming(packetFinishTime); + // stats updates + transDist[pkt_cmd]++; + snoopDataThroughBus += pkt_size; + return true; } @@ -373,6 +417,9 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id) slavePorts[slave_port_id]->name(), pkt->getAddr(), pkt->cmdString()); + // add the request data + dataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + MemCmd snoop_response_cmd = MemCmd::InvalidCmd; Tick snoop_response_latency = 0; @@ -400,6 +447,10 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id) response_latency = snoop_response_latency; } + // add the response data + if (pkt->isResponse()) + dataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + // @todo: Not setting first-word time pkt->busLastWordDelay = response_latency; return response_latency; @@ -412,6 +463,9 @@ CoherentBus::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) masterPorts[master_port_id]->name(), pkt->getAddr(), pkt->cmdString()); + // add the request snoop data + snoopDataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + // forward to all snoopers std::pair snoop_result = forwardAtomic(pkt, InvalidPortID); @@ -421,6 +475,10 @@ CoherentBus::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) if (snoop_response_cmd != MemCmd::InvalidCmd) pkt->cmd = snoop_response_cmd; + // add the response snoop data + if (pkt->isResponse()) + snoopDataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + // @todo: Not setting first-word time pkt->busLastWordDelay = snoop_response_latency; return snoop_response_latency; @@ -542,6 +600,34 @@ CoherentBus::drain(DrainManager *dm) return reqLayer.drain(dm) + respLayer.drain(dm) + snoopRespLayer.drain(dm); } +void +CoherentBus::regStats() +{ + // register the stats of the base class and our three bus layers + BaseBus::regStats(); + reqLayer.regStats(); + respLayer.regStats(); + snoopRespLayer.regStats(); + + dataThroughBus + .name(name() + ".data_through_bus") + .desc("Total data (bytes)") + ; + + snoopDataThroughBus + .name(name() + ".snoop_data_through_bus") + .desc("Total snoop data (bytes)") + ; + + throughput + .name(name() + ".throughput") + .desc("Throughput (bytes/s)") + .precision(0) + ; + + throughput = (dataThroughBus + snoopDataThroughBus) / simSeconds; +} + CoherentBus * CoherentBusParams::create() { diff --git a/src/mem/coherent_bus.hh b/src/mem/coherent_bus.hh index 837cc23d8..eb8b41e6a 100644 --- a/src/mem/coherent_bus.hh +++ b/src/mem/coherent_bus.hh @@ -300,6 +300,9 @@ class CoherentBus : public BaseBus */ void forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id); + Stats::Scalar dataThroughBus; + Stats::Scalar snoopDataThroughBus; + public: virtual void init(); @@ -307,6 +310,8 @@ class CoherentBus : public BaseBus CoherentBus(const CoherentBusParams *p); unsigned int drain(DrainManager *dm); + + virtual void regStats(); }; #endif //__MEM_COHERENT_BUS_HH__ diff --git a/src/mem/noncoherent_bus.cc b/src/mem/noncoherent_bus.cc index f6c315297..5bf5cfd88 100644 --- a/src/mem/noncoherent_bus.cc +++ b/src/mem/noncoherent_bus.cc @@ -99,11 +99,11 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) assert(!pkt->isExpressSnoop()); // determine the destination based on the address - PortID dest_port_id = findPort(pkt->getAddr()); + PortID master_port_id = findPort(pkt->getAddr()); // test if the bus should be considered occupied for the current // port - if (!reqLayer.tryTiming(src_port, dest_port_id)) { + if (!reqLayer.tryTiming(src_port, master_port_id)) { DPRINTF(NoncoherentBus, "recvTimingReq: src %s %s 0x%x BUSY\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); return false; @@ -112,6 +112,11 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) DPRINTF(NoncoherentBus, "recvTimingReq: src %s %s 0x%x\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); + // store size and command as they might be modified when + // forwarding the packet + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; + unsigned int pkt_cmd = pkt->cmdToIndex(); + // set the source port for routing of the response pkt->setSrc(slave_port_id); @@ -119,7 +124,7 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) Tick packetFinishTime = pkt->busLastWordDelay + curTick(); // since it is a normal request, attempt to send the packet - bool success = masterPorts[dest_port_id]->sendTimingReq(pkt); + bool success = masterPorts[master_port_id]->sendTimingReq(pkt); if (!success) { // inhibited packets should never be forced to retry @@ -132,7 +137,7 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) pkt->busFirstWordDelay = pkt->busLastWordDelay = 0; // occupy until the header is sent - reqLayer.failedTiming(src_port, dest_port_id, + reqLayer.failedTiming(src_port, master_port_id, clockEdge(Cycles(headerCycles))); return false; @@ -140,6 +145,12 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) reqLayer.succeededTiming(packetFinishTime); + // stats updates + dataThroughBus += pkt_size; + pktCount[slave_port_id][master_port_id]++; + totPktSize[slave_port_id][master_port_id] += pkt_size; + transDist[pkt_cmd]++; + return true; } @@ -160,11 +171,18 @@ NoncoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) DPRINTF(NoncoherentBus, "recvTimingResp: src %s %s 0x%x\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); + // store size and command as they might be modified when + // forwarding the packet + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; + unsigned int pkt_cmd = pkt->cmdToIndex(); + calcPacketTiming(pkt); Tick packetFinishTime = pkt->busLastWordDelay + curTick(); - // send the packet to the destination through one of our slave - // ports, as determined by the destination field + // determine the destination based on what is stored in the packet + PortID slave_port_id = pkt->getDest(); + + // send the packet through the destination slave port bool success M5_VAR_USED = slavePorts[pkt->getDest()]->sendTimingResp(pkt); // currently it is illegal to block responses... can lead to @@ -173,6 +191,12 @@ NoncoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) respLayer.succeededTiming(packetFinishTime); + // stats updates + dataThroughBus += pkt_size; + pktCount[slave_port_id][master_port_id]++; + totPktSize[slave_port_id][master_port_id] += pkt_size; + transDist[pkt_cmd]++; + return true; } @@ -192,12 +216,19 @@ NoncoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id) slavePorts[slave_port_id]->name(), pkt->getAddr(), pkt->cmdString()); + // add the request data + dataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + // determine the destination port PortID dest_id = findPort(pkt->getAddr()); // forward the request to the appropriate destination Tick response_latency = masterPorts[dest_id]->sendAtomic(pkt); + // add the response data + if (pkt->isResponse()) + dataThroughBus += pkt->hasData() ? pkt->getSize() : 0; + // @todo: Not setting first-word time pkt->busLastWordDelay = response_latency; return response_latency; @@ -233,3 +264,25 @@ NoncoherentBusParams::create() { return new NoncoherentBus(this); } + +void +NoncoherentBus::regStats() +{ + // register the stats of the base class and our two bus layers + BaseBus::regStats(); + reqLayer.regStats(); + respLayer.regStats(); + + dataThroughBus + .name(name() + ".data_through_bus") + .desc("Total data (bytes)") + ; + + throughput + .name(name() + ".throughput") + .desc("Throughput (bytes/s)") + .precision(0) + ; + + throughput = dataThroughBus / simSeconds; +} diff --git a/src/mem/noncoherent_bus.hh b/src/mem/noncoherent_bus.hh index 5d20a11b2..8fc2c40d5 100644 --- a/src/mem/noncoherent_bus.hh +++ b/src/mem/noncoherent_bus.hh @@ -209,6 +209,11 @@ class NoncoherentBus : public BaseBus unsigned int drain(DrainManager *dm); + /** + * stats + */ + virtual void regStats(); + Stats::Scalar dataThroughBus; }; #endif //__MEM_NONCOHERENT_BUS_HH__