From b9c7b8190c27ad161689934fa780859860cfd74f Mon Sep 17 00:00:00 2001 From: Jieming Yin Date: Thu, 15 Dec 2016 16:59:17 -0500 Subject: [PATCH] ruby: Detect garnet network-level deadlock. This patch detects garnet network deadlock by monitoring network interfaces. If a network interface continuously fails to allocate virtual channels for a message, a possible deadlock is detected. --- configs/network/Network.py | 4 ++++ src/mem/ruby/network/garnet2.0/GarnetNetwork.py | 4 ++++ src/mem/ruby/network/garnet2.0/NetworkInterface.cc | 11 ++++++++++- src/mem/ruby/network/garnet2.0/NetworkInterface.hh | 3 +++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/configs/network/Network.py b/configs/network/Network.py index 3c15a4f79..1fbb38167 100644 --- a/configs/network/Network.py +++ b/configs/network/Network.py @@ -70,6 +70,9 @@ def define_options(parser): default=False, help="""enable network fault model: see src/mem/ruby/network/fault_model/""") + parser.add_option("--garnet-deadlock-threshold", action="store", + type="int", default=50000, + help="network-level deadlock threshold.") def create_network(options, ruby): @@ -103,6 +106,7 @@ def init_network(options, network, InterfaceClass): network.vcs_per_vnet = options.vcs_per_vnet network.ni_flit_size = options.link_width_bits / 8 network.routing_algorithm = options.routing_algorithm + network.garnet_deadlock_threshold = options.garnet_deadlock_threshold if options.network == "simple": network.setup_buffers() diff --git a/src/mem/ruby/network/garnet2.0/GarnetNetwork.py b/src/mem/ruby/network/garnet2.0/GarnetNetwork.py index 704532782..00213d60f 100644 --- a/src/mem/ruby/network/garnet2.0/GarnetNetwork.py +++ b/src/mem/ruby/network/garnet2.0/GarnetNetwork.py @@ -46,6 +46,8 @@ class GarnetNetwork(RubyNetwork): "0: Weight-based Table, 1: XY, 2: Custom"); enable_fault_model = Param.Bool(False, "enable network fault model"); fault_model = Param.FaultModel(NULL, "network fault model"); + garnet_deadlock_threshold = Param.UInt32(50000, + "network-level deadlock threshold") class GarnetNetworkInterface(ClockedObject): type = 'GarnetNetworkInterface' @@ -57,6 +59,8 @@ class GarnetNetworkInterface(ClockedObject): "virtual channels per virtual network") virt_nets = Param.UInt32(Parent.number_of_virtual_networks, "number of virtual networks") + garnet_deadlock_threshold = Param.UInt32(Parent.garnet_deadlock_threshold, + "network-level deadlock threshold") class GarnetRouter(BasicRouter): type = 'GarnetRouter' diff --git a/src/mem/ruby/network/garnet2.0/NetworkInterface.cc b/src/mem/ruby/network/garnet2.0/NetworkInterface.cc index 6bdaf39af..0ac55d260 100644 --- a/src/mem/ruby/network/garnet2.0/NetworkInterface.cc +++ b/src/mem/ruby/network/garnet2.0/NetworkInterface.cc @@ -50,7 +50,9 @@ using m5::stl_helpers::deletePointers; NetworkInterface::NetworkInterface(const Params *p) : ClockedObject(p), Consumer(this), m_id(p->id), m_virtual_networks(p->virt_nets), m_vc_per_vnet(p->vcs_per_vnet), - m_num_vcs(m_vc_per_vnet * m_virtual_networks) + m_num_vcs(m_vc_per_vnet * m_virtual_networks), + m_deadlock_threshold(p->garnet_deadlock_threshold), + vc_busy_counter(m_virtual_networks, 0) { m_router_id = -1; m_vc_round_robin = 0; @@ -314,9 +316,16 @@ NetworkInterface::calculateVC(int vnet) if (m_out_vc_state[(vnet*m_vc_per_vnet) + delta]->isInState( IDLE_, curCycle())) { + vc_busy_counter[vnet] = 0; return ((vnet*m_vc_per_vnet) + delta); } } + + vc_busy_counter[vnet] += 1; + panic_if(vc_busy_counter[vnet] > m_deadlock_threshold, + "%s: Possible network deadlock in vnet: %d at time: %llu \n", + name(), vnet, curTick()); + return -1; } diff --git a/src/mem/ruby/network/garnet2.0/NetworkInterface.hh b/src/mem/ruby/network/garnet2.0/NetworkInterface.hh index 85e0145af..f1d1fd505 100644 --- a/src/mem/ruby/network/garnet2.0/NetworkInterface.hh +++ b/src/mem/ruby/network/garnet2.0/NetworkInterface.hh @@ -83,6 +83,7 @@ class NetworkInterface : public ClockedObject, public Consumer int m_vc_round_robin; // For round robin scheduling flitBuffer *outFlitQueue; // For modeling link contention flitBuffer *outCreditQueue; + int m_deadlock_threshold; NetworkLink *inNetLink; NetworkLink *outNetLink; @@ -98,6 +99,8 @@ class NetworkInterface : public ClockedObject, public Consumer std::vector inNode_ptr; // The Message buffers that provides messages to the protocol std::vector outNode_ptr; + // When a vc stays busy for a long time, it indicates a deadlock + std::vector vc_busy_counter; bool flitisizeMessage(MsgPtr msg_ptr, int vnet); int calculateVC(int vnet);