ruby: Detect garnet network-level deadlock.
This patch detects garnet network deadlock by monitoring network interfaces. If a network interface continuously fails to allocate virtual channels for a message, a possible deadlock is detected.
This commit is contained in:
parent
cc1f5a4d16
commit
b9c7b8190c
4 changed files with 21 additions and 1 deletions
|
@ -70,6 +70,9 @@ def define_options(parser):
|
||||||
default=False,
|
default=False,
|
||||||
help="""enable network fault model:
|
help="""enable network fault model:
|
||||||
see src/mem/ruby/network/fault_model/""")
|
see src/mem/ruby/network/fault_model/""")
|
||||||
|
parser.add_option("--garnet-deadlock-threshold", action="store",
|
||||||
|
type="int", default=50000,
|
||||||
|
help="network-level deadlock threshold.")
|
||||||
|
|
||||||
|
|
||||||
def create_network(options, ruby):
|
def create_network(options, ruby):
|
||||||
|
@ -103,6 +106,7 @@ def init_network(options, network, InterfaceClass):
|
||||||
network.vcs_per_vnet = options.vcs_per_vnet
|
network.vcs_per_vnet = options.vcs_per_vnet
|
||||||
network.ni_flit_size = options.link_width_bits / 8
|
network.ni_flit_size = options.link_width_bits / 8
|
||||||
network.routing_algorithm = options.routing_algorithm
|
network.routing_algorithm = options.routing_algorithm
|
||||||
|
network.garnet_deadlock_threshold = options.garnet_deadlock_threshold
|
||||||
|
|
||||||
if options.network == "simple":
|
if options.network == "simple":
|
||||||
network.setup_buffers()
|
network.setup_buffers()
|
||||||
|
|
|
@ -46,6 +46,8 @@ class GarnetNetwork(RubyNetwork):
|
||||||
"0: Weight-based Table, 1: XY, 2: Custom");
|
"0: Weight-based Table, 1: XY, 2: Custom");
|
||||||
enable_fault_model = Param.Bool(False, "enable network fault model");
|
enable_fault_model = Param.Bool(False, "enable network fault model");
|
||||||
fault_model = Param.FaultModel(NULL, "network fault model");
|
fault_model = Param.FaultModel(NULL, "network fault model");
|
||||||
|
garnet_deadlock_threshold = Param.UInt32(50000,
|
||||||
|
"network-level deadlock threshold")
|
||||||
|
|
||||||
class GarnetNetworkInterface(ClockedObject):
|
class GarnetNetworkInterface(ClockedObject):
|
||||||
type = 'GarnetNetworkInterface'
|
type = 'GarnetNetworkInterface'
|
||||||
|
@ -57,6 +59,8 @@ class GarnetNetworkInterface(ClockedObject):
|
||||||
"virtual channels per virtual network")
|
"virtual channels per virtual network")
|
||||||
virt_nets = Param.UInt32(Parent.number_of_virtual_networks,
|
virt_nets = Param.UInt32(Parent.number_of_virtual_networks,
|
||||||
"number of virtual networks")
|
"number of virtual networks")
|
||||||
|
garnet_deadlock_threshold = Param.UInt32(Parent.garnet_deadlock_threshold,
|
||||||
|
"network-level deadlock threshold")
|
||||||
|
|
||||||
class GarnetRouter(BasicRouter):
|
class GarnetRouter(BasicRouter):
|
||||||
type = 'GarnetRouter'
|
type = 'GarnetRouter'
|
||||||
|
|
|
@ -50,7 +50,9 @@ using m5::stl_helpers::deletePointers;
|
||||||
NetworkInterface::NetworkInterface(const Params *p)
|
NetworkInterface::NetworkInterface(const Params *p)
|
||||||
: ClockedObject(p), Consumer(this), m_id(p->id),
|
: ClockedObject(p), Consumer(this), m_id(p->id),
|
||||||
m_virtual_networks(p->virt_nets), m_vc_per_vnet(p->vcs_per_vnet),
|
m_virtual_networks(p->virt_nets), m_vc_per_vnet(p->vcs_per_vnet),
|
||||||
m_num_vcs(m_vc_per_vnet * m_virtual_networks)
|
m_num_vcs(m_vc_per_vnet * m_virtual_networks),
|
||||||
|
m_deadlock_threshold(p->garnet_deadlock_threshold),
|
||||||
|
vc_busy_counter(m_virtual_networks, 0)
|
||||||
{
|
{
|
||||||
m_router_id = -1;
|
m_router_id = -1;
|
||||||
m_vc_round_robin = 0;
|
m_vc_round_robin = 0;
|
||||||
|
@ -314,9 +316,16 @@ NetworkInterface::calculateVC(int vnet)
|
||||||
|
|
||||||
if (m_out_vc_state[(vnet*m_vc_per_vnet) + delta]->isInState(
|
if (m_out_vc_state[(vnet*m_vc_per_vnet) + delta]->isInState(
|
||||||
IDLE_, curCycle())) {
|
IDLE_, curCycle())) {
|
||||||
|
vc_busy_counter[vnet] = 0;
|
||||||
return ((vnet*m_vc_per_vnet) + delta);
|
return ((vnet*m_vc_per_vnet) + delta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vc_busy_counter[vnet] += 1;
|
||||||
|
panic_if(vc_busy_counter[vnet] > m_deadlock_threshold,
|
||||||
|
"%s: Possible network deadlock in vnet: %d at time: %llu \n",
|
||||||
|
name(), vnet, curTick());
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,7 @@ class NetworkInterface : public ClockedObject, public Consumer
|
||||||
int m_vc_round_robin; // For round robin scheduling
|
int m_vc_round_robin; // For round robin scheduling
|
||||||
flitBuffer *outFlitQueue; // For modeling link contention
|
flitBuffer *outFlitQueue; // For modeling link contention
|
||||||
flitBuffer *outCreditQueue;
|
flitBuffer *outCreditQueue;
|
||||||
|
int m_deadlock_threshold;
|
||||||
|
|
||||||
NetworkLink *inNetLink;
|
NetworkLink *inNetLink;
|
||||||
NetworkLink *outNetLink;
|
NetworkLink *outNetLink;
|
||||||
|
@ -98,6 +99,8 @@ class NetworkInterface : public ClockedObject, public Consumer
|
||||||
std::vector<MessageBuffer *> inNode_ptr;
|
std::vector<MessageBuffer *> inNode_ptr;
|
||||||
// The Message buffers that provides messages to the protocol
|
// The Message buffers that provides messages to the protocol
|
||||||
std::vector<MessageBuffer *> outNode_ptr;
|
std::vector<MessageBuffer *> outNode_ptr;
|
||||||
|
// When a vc stays busy for a long time, it indicates a deadlock
|
||||||
|
std::vector<int> vc_busy_counter;
|
||||||
|
|
||||||
bool flitisizeMessage(MsgPtr msg_ptr, int vnet);
|
bool flitisizeMessage(MsgPtr msg_ptr, int vnet);
|
||||||
int calculateVC(int vnet);
|
int calculateVC(int vnet);
|
||||||
|
|
Loading…
Reference in a new issue