fc5bf6713f
Multi gem5 is an extension to gem5 to enable parallel simulation of a distributed system (e.g. simulation of a pool of machines connected by Ethernet links). A multi gem5 run consists of seperate gem5 processes running in parallel (potentially on different hosts/slots on a cluster). Each gem5 process executes the simulation of a component of the simulated distributed system (e.g. a multi-core board with an Ethernet NIC). The patch implements the "distributed" Ethernet link device (dev/src/multi_etherlink.[hh.cc]). This device will send/receive (simulated) Ethernet packets to/from peer gem5 processes. The interface to talk to the peer gem5 processes is defined in dev/src/multi_iface.hh and in tcp_iface.hh. There is also a central message server process (util/multi/tcp_server.[hh,cc]) which acts like an Ethernet switch and transfers messages among the gem5 peers. A multi gem5 simulations can be kicked off by the util/multi/gem5-multi.sh wrapper script. Checkpoints are supported by multi-gem5. The checkpoint must be initiated by a single gem5 process. E.g., the gem5 process with rank 0 can take a checkpoint from the bootscript just before it invokes 'mpirun' to launch an MPI test. The message server process will notify all the other peer gem5 processes and make them take a checkpoint, too (after completing a global synchronisation to ensure that there are no inflight messages among gem5).
158 lines
5.2 KiB
C++
158 lines
5.2 KiB
C++
/*
|
|
* Copyright (c) 2015 ARM Limited
|
|
* All rights reserved
|
|
*
|
|
* The license below extends only to copyright in the software and shall
|
|
* not be construed as granting a license to any other intellectual
|
|
* property including but not limited to intellectual property relating
|
|
* to a hardware implementation of the functionality of the software
|
|
* licensed hereunder. You may use the software subject to the license
|
|
* terms below provided that you ensure that this notice is replicated
|
|
* unmodified and in its entirety in all distributions of the software,
|
|
* modified or unmodified, in source code or in binary form.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Authors: Gabor Dozsa
|
|
*/
|
|
|
|
/* @file
|
|
* TCP stream socket based interface class implementation for multi gem5 runs.
|
|
*/
|
|
|
|
#include "dev/tcp_iface.hh"
|
|
|
|
#include <arpa/inet.h>
|
|
#include <netdb.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
|
|
#include <cerrno>
|
|
#include <cstring>
|
|
|
|
#include "base/types.hh"
|
|
#include "debug/MultiEthernet.hh"
|
|
|
|
// MSG_NOSIGNAL does not exists on OS X
|
|
#if defined(__APPLE__) || defined(__MACH__)
|
|
#ifndef MSG_NOSIGNAL
|
|
#define MSG_NOSIGNAL SO_NOSIGPIPE
|
|
#endif
|
|
#endif
|
|
|
|
using namespace std;
|
|
|
|
vector<int> TCPIface::sockRegistry;
|
|
|
|
TCPIface::TCPIface(string server_name, unsigned server_port,
|
|
unsigned multi_rank, Tick sync_start, Tick sync_repeat,
|
|
EventManager *em) :
|
|
MultiIface(multi_rank, sync_start, sync_repeat, em)
|
|
{
|
|
struct addrinfo addr_hint, *addr_results;
|
|
int ret;
|
|
|
|
string port_str = to_string(server_port);
|
|
|
|
sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
|
|
panic_if(sock < 0, "socket() failed: %s", strerror(errno));
|
|
|
|
bzero(&addr_hint, sizeof(addr_hint));
|
|
addr_hint.ai_family = AF_INET;
|
|
addr_hint.ai_socktype = SOCK_STREAM;
|
|
addr_hint.ai_protocol = IPPROTO_TCP;
|
|
|
|
ret = getaddrinfo(server_name.c_str(), port_str.c_str(),
|
|
&addr_hint, &addr_results);
|
|
panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno));
|
|
|
|
DPRINTF(MultiEthernet, "Connecting to %s:%u\n",
|
|
server_name.c_str(), port_str.c_str());
|
|
|
|
ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr),
|
|
addr_results->ai_addrlen);
|
|
panic_if(ret < 0, "connect() failed: %s", strerror(errno));
|
|
|
|
freeaddrinfo(addr_results);
|
|
// add our socket to the static registry
|
|
sockRegistry.push_back(sock);
|
|
// let the server know who we are
|
|
sendTCP(sock, &multi_rank, sizeof(multi_rank));
|
|
}
|
|
|
|
TCPIface::~TCPIface()
|
|
{
|
|
int M5_VAR_USED ret;
|
|
|
|
ret = close(sock);
|
|
assert(ret == 0);
|
|
}
|
|
|
|
void
|
|
TCPIface::sendTCP(int sock, void *buf, unsigned length)
|
|
{
|
|
ssize_t ret;
|
|
|
|
ret = ::send(sock, buf, length, MSG_NOSIGNAL);
|
|
panic_if(ret < 0, "send() failed: %s", strerror(errno));
|
|
panic_if(ret != length, "send() failed");
|
|
}
|
|
|
|
bool
|
|
TCPIface::recvTCP(int sock, void *buf, unsigned length)
|
|
{
|
|
ssize_t ret;
|
|
|
|
ret = ::recv(sock, buf, length, MSG_WAITALL );
|
|
if (ret < 0) {
|
|
if (errno == ECONNRESET || errno == EPIPE)
|
|
inform("recv(): %s", strerror(errno));
|
|
else if (ret < 0)
|
|
panic("recv() failed: %s", strerror(errno));
|
|
} else if (ret == 0) {
|
|
inform("recv(): Connection closed");
|
|
} else if (ret != length)
|
|
panic("recv() failed");
|
|
|
|
return (ret == length);
|
|
}
|
|
|
|
void
|
|
TCPIface::syncRaw(MultiHeaderPkt::MsgType sync_req, Tick sync_tick)
|
|
{
|
|
/*
|
|
* Barrier is simply implemented by point-to-point messages to the server
|
|
* for now. This method is called by only one TCPIface object.
|
|
* The server will send back an 'ack' message when it gets the
|
|
* sync request from all clients.
|
|
*/
|
|
MultiHeaderPkt::Header header_pkt;
|
|
header_pkt.msgType = sync_req;
|
|
header_pkt.sendTick = sync_tick;
|
|
|
|
for (auto s : sockRegistry)
|
|
sendTCP(s, (void *)&header_pkt, sizeof(header_pkt));
|
|
}
|
|
|