minix/servers/pfs/uds.c

1531 lines
35 KiB
C
Raw Normal View History

/*
* Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
* This code handles ioctl(2) commands to implement the socket API.
* Some helper functions are also present.
*
* The entry points into this file are...
*
* uds_init: initialize the descriptor table.
* do_accept: handles the accept(2) syscall.
* do_connect: handles the connect(2) syscall.
* do_listen: handles the listen(2) syscall.
* do_socket: handles the socket(2) syscall.
* do_bind: handles the bind(2) syscall.
* do_getsockname: handles the getsockname(2) syscall.
* do_getpeername: handles the getpeername(2) syscall.
* do_shutdown: handles the shutdown(2) syscall.
* do_socketpair: handles the socketpair(2) syscall.
* do_getsockopt_sotype: handles the getsockopt(2) syscall.
* do_getsockopt_peercred: handles the getsockopt(2) syscall.
* do_getsockopt_sndbuf: handles the getsockopt(2) syscall.
* do_setsockopt_sndbuf: handles the setsockopt(2) syscall.
* do_getsockopt_rcvbuf: handles the getsockopt(2) syscall.
* do_setsockopt_rcvbuf: handles the setsockopt(2) syscall.
* do_sendto: handles the sendto(2) syscall.
* do_recvfrom: handles the recvfrom(2) syscall.
* do_sendmsg: handles the sendmsg(2) syscall.
* do_recvmsg: handles the recvmsg(2) syscall.
* perform_connection: performs the connection of two descriptors.
* clear_fds: calls put_filp for undelivered FDs.
*
* Also see...
*
* table.c, dev_uds.c, uds.h
*/
#define DEBUG 0
#include "inc.h"
#include "const.h"
#include "glo.h"
#include "uds.h"
/* File Descriptor Table */
uds_fd_t uds_fd_table[NR_FDS];
/* initialize the descriptor table */
PUBLIC void uds_init(void)
{
/*
* Setting everything to NULL implicitly sets the
* state to UDS_FREE.
*/
memset(uds_fd_table, '\0', sizeof(uds_fd_t) * NR_FDS);
}
/* check the permissions of a socket file */
PRIVATE int check_perms(int minor, struct sockaddr_un *addr)
{
int rc;
message vfs_m;
cp_grant_id_t grant_id;
grant_id = cpf_grant_direct(VFS_PROC_NR, (vir_bytes) addr->sun_path,
UNIX_PATH_MAX, CPF_READ | CPF_WRITE);
/* ask the VFS to verify the permissions */
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = PFS_REQ_CHECK_PERMS;
vfs_m.USER_ENDPT = uds_fd_table[minor].owner;
vfs_m.IO_GRANT = (char *) grant_id;
vfs_m.COUNT = UNIX_PATH_MAX;
rc = sendrec(VFS_PROC_NR, &vfs_m);
cpf_revoke(grant_id);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
printf("(uds) Canonical Path => %s\n", addr->sun_path);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
PRIVATE filp_id_t verify_fd(endpoint_t ep, int fd)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) verify_fd(%d,%d) call_count=%d\n", ep, fd,
++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = PFS_REQ_VERIFY_FD;
vfs_m.USER_ENDPT = ep;
vfs_m.COUNT = fd;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return NULL;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.ADDRESS;
}
PRIVATE int set_filp(filp_id_t sfilp)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) set_filp(0x%x) call_count=%d\n", sfilp, ++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = PFS_REQ_SET_FILP;
vfs_m.ADDRESS = sfilp;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
PRIVATE int copy_filp(endpoint_t to_ep, filp_id_t cfilp)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) copy_filp(%d, 0x%x) call_count=%d\n",to_ep, cfilp,
++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = PFS_REQ_COPY_FILP;
vfs_m.USER_ENDPT = to_ep;
vfs_m.ADDRESS = cfilp;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type;
}
PRIVATE int put_filp(filp_id_t pfilp)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) put_filp(0x%x) call_count=%d\n", pfilp, ++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = PFS_REQ_PUT_FILP;
vfs_m.ADDRESS = pfilp;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
PRIVATE int cancel_fd(endpoint_t ep, int fd)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) cancel_fd(%d,%d) call_count=%d\n", ep, fd, ++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = PFS_REQ_CANCEL_FD;
vfs_m.USER_ENDPT = ep;
vfs_m.COUNT = fd;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
PUBLIC int perform_connection(message *dev_m_in, message *dev_m_out,
struct sockaddr_un *addr, int minorx, int minory)
{
/* there are several places were a connection is established. */
/* accept(2), connect(2), uds_status(2), socketpair(2) */
/* This is a helper function to make sure it is done in the */
/* same way in each place with the same validation checks. */
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] perform_connection() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
/* only connection oriented types are acceptable and only like
* types can connect to each other
*/
if ((uds_fd_table[minorx].type != SOCK_SEQPACKET &&
uds_fd_table[minorx].type != SOCK_STREAM) ||
uds_fd_table[minorx].type != uds_fd_table[minory].type) {
/* sockets are not in a valid state */
return EINVAL;
}
/* connect the pair of sockets */
uds_fd_table[minorx].peer = minory;
uds_fd_table[minory].peer = minorx;
/* Set the address of both sockets */
memcpy(&(uds_fd_table[minorx].addr), addr, sizeof(struct sockaddr_un));
memcpy(&(uds_fd_table[minory].addr), addr, sizeof(struct sockaddr_un));
return OK;
}
PUBLIC int do_accept(message *dev_m_in, message *dev_m_out)
{
int minor;
int minorparent; /* minor number of parent (server) */
int minorpeer;
int rc, i;
struct sockaddr_un addr;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_accept() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
/* Somewhat weird logic is used in this function, so here's an
* overview... The minor number is the server's client socket
* (the socket to be returned by accept()). The data waiting
* for us in the IO Grant is the address that the server is
* listening on. This function uses the address to find the
* server's descriptor. From there we can perform the
* connection or suspend and wait for a connect().
*/
minor = uds_minor(dev_m_in);
if (uds_fd_table[minor].type != -1) {
/* this IOCTL must be called on a 'fresh' socket */
return EINVAL;
}
/* Get the server's address */
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
D);
if (rc != OK) {
return EIO;
}
/* locate server socket */
rc = -1; /* to trap error */
for (i = 0; i < NR_FDS; i++) {
if (uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(addr.sun_path,
uds_fd_table[i].addr.sun_path,
UNIX_PATH_MAX) &&
uds_fd_table[i].listening == 1) {
rc = 0;
break;
}
}
if (rc == -1) {
/* there is no server listening on addr. Maybe someone
* screwed up the ioctl()?
*/
return EINVAL;
}
minorparent = i; /* parent */
/* we are the parent's child */
uds_fd_table[minorparent].child = minor;
/* the peer has the same type as the parent. we need to be that
* type too.
*/
uds_fd_table[minor].type = uds_fd_table[minorparent].type;
/* locate peer to accept in the parent's backlog */
minorpeer = -1; /* to trap error */
for (i = 0; i < uds_fd_table[minorparent].backlog_size; i++) {
if (uds_fd_table[minorparent].backlog[i] != -1) {
minorpeer = uds_fd_table[minorparent].backlog[i];
uds_fd_table[minorparent].backlog[i] = -1;
rc = 0;
break;
}
}
if (minorpeer == -1) {
#if DEBUG == 1
printf("(uds) [%d] {do_accept} suspend\n", minor);
#endif
/* there are no peers in the backlog, suspend and wait
* for some to show up
*/
uds_fd_table[minor].suspended = UDS_SUSPENDED_ACCEPT;
return SUSPEND;
}
#if DEBUG == 1
printf("(uds) [%d] connecting to %d -- parent is %d\n", minor,
minorpeer, minorparent);
#endif
rc = perform_connection(dev_m_in, dev_m_out, &addr, minor, minorpeer);
if (rc != OK) {
#if DEBUG == 1
printf("(uds) [%d] {do_accept} connection not performed\n",
minor);
#endif
return rc;
}
uds_fd_table[minorparent].child = -1;
/* if peer is blocked on connect() revive peer */
if (uds_fd_table[minorpeer].suspended) {
#if DEBUG == 1
printf("(uds) [%d] {do_accept} revive %d\n", minor,
minorpeer);
#endif
uds_fd_table[minorpeer].ready_to_revive = 1;
notify(dev_m_in->m_source);
}
return OK;
}
PUBLIC int do_connect(message *dev_m_in, message *dev_m_out)
{
int minor;
struct sockaddr_un addr;
int rc, i, j;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_connect() call_count=%d\n", uds_minor(dev_m_in),
++call_count);
#endif
minor = uds_minor(dev_m_in);
/* only connection oriented sockets can connect */
if (uds_fd_table[minor].type != SOCK_STREAM &&
uds_fd_table[minor].type != SOCK_SEQPACKET) {
return EINVAL;
}
if (uds_fd_table[minor].peer != -1) {
/* socket is already connected */
return EISCONN;
}
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &addr,
sizeof(struct sockaddr_un), D);
if (rc != OK) {
return EIO;
}
rc = check_perms(minor, &addr);
if (rc != OK) {
/* permission denied, socket file doesn't exist, etc. */
return rc;
}
/* look for a socket of the same type that is listening on the
* address we want to connect to
*/
for (i = 0; i < NR_FDS; i++) {
if (uds_fd_table[minor].type == uds_fd_table[i].type &&
uds_fd_table[i].listening &&
uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(addr.sun_path, uds_fd_table[i].addr.sun_path,
UNIX_PATH_MAX)) {
if (uds_fd_table[i].child != -1) {
/* the server is blocked on accept(2) --
* perform connection to the child
*/
rc = perform_connection(dev_m_in, dev_m_out,
&addr, minor, uds_fd_table[i].child);
if (rc == OK) {
uds_fd_table[i].child = -1;
#if DEBUG == 1
printf("(uds) [%d] {do_connect} revive %d\n", minor, i);
#endif
/* wake the parent (server) */
uds_fd_table[i].ready_to_revive = 1;
notify(dev_m_in->m_source);
}
return rc;
} else {
#if DEBUG == 1
printf("(uds) [%d] adding to %d's backlog\n",
minor, i);
#endif
/* tell the server were waiting to be served */
/* look for a free slot in the backlog */
rc = -1; /* to trap error */
for (j = 0; j < uds_fd_table[i].backlog_size;
j++) {
if (uds_fd_table[i].backlog[j] == -1) {
uds_fd_table[i].backlog[j] =
minor;
rc = 0;
break;
}
}
if (rc == -1) {
/* backlog is full */
break;
}
/* see if the server is blocked on select() */
if (uds_fd_table[i].selecting == 1) {
/* if the server wants to know
* about data ready to read and
* it doesn't know about it
* already, then let the server
* know we have data for it.
*/
if ((uds_fd_table[i].sel_ops_in &
SEL_RD) &&
!(uds_fd_table[i].sel_ops_out &
SEL_RD)) {
uds_fd_table[i].sel_ops_out |=
SEL_RD;
uds_fd_table[i].status_updated
= 1;
notify(
uds_fd_table[i].select_proc
);
}
}
/* we found our server */
uds_fd_table[minor].peer = i;
/* set the address */
memcpy(&(uds_fd_table[minor].addr), &addr,
sizeof(struct sockaddr_un));
break;
}
}
}
if (uds_fd_table[minor].peer == -1) {
/* could not find another open socket listening on the
* specified address with room in the backlog
*/
return ECONNREFUSED;
}
#if DEBUG == 1
printf("(uds) [%d] {do_connect} suspend\n", minor);
#endif
/* suspend until the server side completes the connection with accept()
*/
uds_fd_table[minor].suspended = UDS_SUSPENDED_CONNECT;
return SUSPEND;
}
PUBLIC int do_listen(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
int backlog_size;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_listen() call_count=%d\n", uds_minor(dev_m_in),
++call_count);
#endif
minor = uds_minor(dev_m_in);
/* ensure the socket has a type and is bound */
if (uds_fd_table[minor].type == -1 ||
uds_fd_table[minor].addr.sun_family != AF_UNIX) {
/* probably trying to call listen() before bind() */
return EINVAL;
}
/* the two supported types for listen(2) are SOCK_STREAM and
* SOCK_SEQPACKET
*/
if (uds_fd_table[minor].type != SOCK_STREAM &&
uds_fd_table[minor].type != SOCK_SEQPACKET) {
/* probably trying to call listen() with a SOCK_DGRAM */
return EOPNOTSUPP;
}
/* The POSIX standard doesn't say what to do if listen() has
* already been called. Well, there isn't an errno. we silently
* let it happen, but if listen() has already been called, we
* don't allow the backlog to shrink
*/
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &backlog_size, sizeof(int), D);
if (rc != OK) {
return EIO;
}
if (uds_fd_table[minor].listening == 0) {
/* See if backlog_size is between 0 and UDS_SOMAXCONN */
if (backlog_size >= 0 && backlog_size < UDS_SOMAXCONN) {
/* use the user provided backlog_size */
uds_fd_table[minor].backlog_size = backlog_size;
} else {
/* the user gave an invalid size, use
* UDS_SOMAXCONN instead
*/
uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
}
} else {
/* See if the user is trying to expand the backlog_size */
if (backlog_size > uds_fd_table[minor].backlog_size &&
backlog_size < UDS_SOMAXCONN) {
/* expand backlog_size */
uds_fd_table[minor].backlog_size = backlog_size;
}
/* Don't let the user shrink the backlog_size (we might
* have clients waiting in those slots
*/
}
/* perform listen(2) */
uds_fd_table[minor].listening = 1;
return OK;
}
PUBLIC int do_socket(message *dev_m_in, message *dev_m_out)
{
int rc;
int minor;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_socket() call_count=%d\n", uds_minor(dev_m_in),
++call_count);
#endif
minor = uds_minor(dev_m_in);
/* see if this socket already has a type */
if (uds_fd_table[minor].type != -1) {
/* socket type can only be set once */
return EINVAL;
}
/* get the requested type */
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type),
sizeof(int), D);
if (rc != OK) {
/* something went wrong and we couldn't get the type */
return EIO;
}
/* validate the type */
switch (uds_fd_table[minor].type) {
case SOCK_STREAM:
case SOCK_DGRAM:
case SOCK_SEQPACKET:
/* the type is one of the 3 valid socket types */
return OK;
default:
/* if the type isn't one of the 3 valid socket
* types, then it must be invalid.
*/
/* set the type back to '-1' (no type set) */
uds_fd_table[minor].type = -1;
return EINVAL;
}
}
PUBLIC int do_bind(message *dev_m_in, message *dev_m_out)
{
int minor;
struct sockaddr_un addr;
int rc, i;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_bind() call_count=%d\n", uds_minor(dev_m_in),
++call_count);
#endif
minor = uds_minor(dev_m_in);
if ((uds_fd_table[minor].type == -1) ||
(uds_fd_table[minor].addr.sun_family == AF_UNIX &&
uds_fd_table[minor].type != SOCK_DGRAM)) {
/* the type hasn't been set by do_socket() yet OR attempting
* to re-bind() a non-SOCK_DGRAM socket
*/
return EINVAL;
}
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
D);
if (rc != OK) {
return EIO;
}
/* do some basic sanity checks on the address */
if (addr.sun_family != AF_UNIX) {
/* bad family */
return EAFNOSUPPORT;
}
if (addr.sun_path[0] == '\0') {
/* bad address */
return ENOENT;
}
rc = check_perms(minor, &addr);
if (rc != OK) {
/* permission denied, socket file doesn't exist, etc. */
return rc;
}
/* make sure the address isn't already in use by another socket. */
for (i = 0; i < NR_FDS; i++) {
if ((uds_fd_table[i].addr.sun_family == AF_UNIX) &&
!strncmp(addr.sun_path,
uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) {
/* another socket is bound to this sun_path */
return EADDRINUSE;
}
}
/* looks good, perform the bind() */
memcpy(&(uds_fd_table[minor].addr), &addr, sizeof(struct sockaddr_un));
return OK;
}
PUBLIC int do_getsockname(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockname() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
/* Unconditionally send the address we have assigned to this socket.
* The POSIX standard doesn't say what to do if the address
* hasn't been set. If the address isn't currently set, then
* the user will get NULL bytes. Note: libc depends on this
* behavior.
*/
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].addr),
sizeof(struct sockaddr_un), D);
return rc ? EIO : OK;
}
PUBLIC int do_getpeername(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getpeername() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
/* check that the socket is connected with a valid peer */
if (uds_fd_table[minor].peer != -1) {
int peer_minor;
peer_minor = uds_fd_table[minor].peer;
/* copy the address from the peer */
rc = sys_safecopyto(VFS_PROC_NR,
(cp_grant_id_t) dev_m_in->IO_GRANT, (vir_bytes) 0,
(vir_bytes) &(uds_fd_table[peer_minor].addr),
sizeof(struct sockaddr_un), D);
return rc ? EIO : OK;
} else {
if (uds_fd_table[minor].err == ECONNRESET) {
uds_fd_table[minor].err = 0;
return ECONNRESET;
} else {
return ENOTCONN;
}
}
}
PUBLIC int do_shutdown(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc, how;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_shutdown() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
if (uds_fd_table[minor].type != SOCK_STREAM &&
uds_fd_table[minor].type != SOCK_SEQPACKET) {
/* socket must be a connection oriented socket */
return EINVAL;
}
if (uds_fd_table[minor].peer == -1) {
/* shutdown(2) is only valid for connected sockets */
if (uds_fd_table[minor].err == ECONNRESET) {
return ECONNRESET;
} else {
return ENOTCONN;
}
}
/* get the 'how' parameter from the process */
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &how, sizeof(int), D);
if (rc != OK) {
return EIO;
}
switch (how) {
case SHUT_RD:
/* take away read permission */
uds_fd_table[minor].mode =
uds_fd_table[minor].mode ^ S_IRUSR;
break;
case SHUT_WR:
/* take away write permission */
uds_fd_table[minor].mode =
uds_fd_table[minor].mode ^ S_IWUSR;
break;
case SHUT_RDWR:
/* completely shutdown */
uds_fd_table[minor].mode = 0;
break;
default:
/* the 'how' parameter is invalid */
return EINVAL;
}
return OK;
}
PUBLIC int do_socketpair(message *dev_m_in, message *dev_m_out)
{
int rc;
dev_t minorin;
int minorx, minory;
struct sockaddr_un addr;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_socketpair() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
/* first ioctl param is the first socket */
minorx = uds_minor(dev_m_in);
/* third ioctl param is the minor number of the second socket */
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &minorin, sizeof(dev_t), D);
if (rc != OK) {
return EIO;
}
minory = (minor(minorin) & BYTE);
#if DEBUG == 1
printf("socketpair() %d - %d\n", minorx, minory);
#endif
/* security check - both sockets must have the same endpoint (owner) */
if (uds_fd_table[minorx].owner != uds_fd_table[minory].owner) {
/* we won't allow you to magically connect your socket to
* someone elses socket
*/
return EPERM;
}
addr.sun_family = AF_UNIX;
addr.sun_path[0] = 'X';
addr.sun_path[1] = '\0';
uds_fd_table[minorx].syscall_done = 1;
return perform_connection(dev_m_in, dev_m_out, &addr, minorx, minory);
}
PUBLIC int do_getsockopt_sotype(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_sotype() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
if (uds_fd_table[minor].type == -1) {
/* the type hasn't been set yet. instead of returning an
* invalid type, we fail with EINVAL
*/
return EINVAL;
}
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].type),
sizeof(int), D);
return rc ? EIO : OK;
}
PUBLIC int do_getsockopt_peercred(message *dev_m_in, message *dev_m_out)
{
int minor;
int peer_minor;
int rc;
struct ucred cred;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_peercred() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
if (uds_fd_table[minor].peer == -1) {
if (uds_fd_table[minor].err == ECONNRESET) {
uds_fd_table[minor].err = 0;
return ECONNRESET;
} else {
return ENOTCONN;
}
}
peer_minor = uds_fd_table[minor].peer;
/* obtain the peer's credentials */
rc = getnucred(uds_fd_table[peer_minor].owner, &cred);
if (rc == -1) {
/* likely error: invalid endpoint / proc doesn't exist */
return errno;
}
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &cred, sizeof(struct ucred), D);
return rc ? EIO : OK;
}
int do_getsockopt_sndbuf(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
size_t sndbuf = PIPE_BUF;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_sndbuf() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &(sndbuf),
sizeof(size_t), D);
return rc ? EIO : OK;
}
int do_setsockopt_sndbuf(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
size_t sndbuf;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &sndbuf,
sizeof(size_t), D);
if (rc != OK) {
return EIO;
}
if (sndbuf > PIPE_BUF) {
/* The send buffer is limited to 32K at the moment. */
return ENOSYS;
}
/* There is no way to reduce the send buffer, do we have to
* let this call fail for smaller buffers?
*/
return OK;
}
int do_getsockopt_rcvbuf(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
size_t rcvbuf = PIPE_BUF;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_rcvbuf() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &(rcvbuf),
sizeof(size_t), D);
return rc ? EIO : OK;
}
int do_setsockopt_rcvbuf(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
size_t rcvbuf;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &rcvbuf,
sizeof(size_t), D);
if (rc != OK) {
return EIO;
}
if (rcvbuf > PIPE_BUF) {
/* The send buffer is limited to 32K at the moment. */
return ENOSYS;
}
/* There is no way to reduce the send buffer, do we have to
* let this call fail for smaller buffers?
*/
return OK;
}
PUBLIC int do_sendto(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
struct sockaddr_un addr;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_sendto() call_count=%d\n", uds_minor(dev_m_in),
++call_count);
#endif
minor = uds_minor(dev_m_in);
if (uds_fd_table[minor].type != SOCK_DGRAM) {
/* This IOCTL is only for SOCK_DGRAM sockets */
return EINVAL;
}
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &addr, sizeof(struct sockaddr_un),
D);
if (rc != OK) {
return EIO;
}
/* do some basic sanity checks on the address */
if (addr.sun_family != AF_UNIX || addr.sun_path[0] == '\0') {
/* bad address */
return EINVAL;
}
rc = check_perms(minor, &addr);
if (rc != OK) {
return rc;
}
memcpy(&(uds_fd_table[minor].target), &addr,
sizeof(struct sockaddr_un));
return OK;
}
PUBLIC int do_recvfrom(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_recvfrom() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &(uds_fd_table[minor].source),
sizeof(struct sockaddr_un), D);
return rc ? EIO : OK;
}
int msg_control_read(struct msg_control *msg_ctrl, struct ancillary *data,
int minor)
{
int rc;
struct msghdr msghdr;
struct cmsghdr *cmsg = NULL;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] msg_control_read() call_count=%d\n", minor,
++call_count);
#endif
data->nfiledes = 0;
memset(&msghdr, '\0', sizeof(struct msghdr));
msghdr.msg_control = msg_ctrl->msg_control;
msghdr.msg_controllen = msg_ctrl->msg_controllen;
for(cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_RIGHTS) {
int i;
int nfds =
MIN((cmsg->cmsg_len-CMSG_LEN(0))/sizeof(int),
OPEN_MAX);
for (i = 0; i < nfds; i++) {
if (data->nfiledes == OPEN_MAX) {
return EOVERFLOW;
}
data->fds[data->nfiledes] =
((int *) CMSG_DATA(cmsg))[i];
#if DEBUG == 1
printf("(uds) [%d] fd[%d]=%d\n", minor,
data->nfiledes, data->fds[data->nfiledes]);
#endif
data->nfiledes++;
}
}
}
/* obtain this socket's credentials */
rc = getnucred(uds_fd_table[minor].owner, &(data->cred));
if (rc == -1) {
return errno;
}
#if DEBUG == 1
printf("(uds) [%d] cred={%d,%d,%d}\n", minor,
data->cred.pid, data->cred.uid,
data->cred.gid);
#endif
return OK;
}
PRIVATE int send_fds(int minor, struct ancillary *data)
{
int rc, i, j;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] send_fds() call_count=%d\n", minor, ++call_count);
#endif
/* verify the file descriptors and get their filps. */
for (i = 0; i < data->nfiledes; i++) {
data->filps[i] = verify_fd(uds_fd_table[minor].owner,
data->fds[i]);
if (data->filps[i] == NULL) {
return EINVAL;
}
}
/* set them as in-flight */
for (i = 0; i < data->nfiledes; i++) {
rc = set_filp(data->filps[i]);
if (rc != OK) {
/* revert set_filp() calls */
for (j = i; j >= 0; j--) {
put_filp(data->filps[j]);
}
return rc;
}
}
return OK;
}
PUBLIC int clear_fds(int minor, struct ancillary *data)
{
/* This function calls put_filp() for all of the FDs in data.
* This is used when a Unix Domain Socket is closed and there
* exists references to file descriptors that haven't been received
* with recvmsg().
*/
int i;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
++call_count);
#endif
for (i = 0; i < data->nfiledes; i++) {
put_filp(data->filps[i]);
#if DEBUG == 1
printf("(uds) clear_fds() => %d\n", data->fds[i]);
#endif
data->fds[i] = -1;
data->filps[i] = NULL;
}
data->nfiledes = 0;
return OK;
}
PRIVATE int recv_fds(int minor, struct ancillary *data,
struct msg_control *msg_ctrl)
{
int rc, i, j;
struct msghdr msghdr;
struct cmsghdr *cmsg;
endpoint_t to_ep;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
++call_count);
#endif
msghdr.msg_control = msg_ctrl->msg_control;
msghdr.msg_controllen = msg_ctrl->msg_controllen;
cmsg = CMSG_FIRSTHDR(&msghdr);
cmsg->cmsg_len = CMSG_LEN(sizeof(int) * data->nfiledes);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
to_ep = uds_fd_table[minor].owner;
/* copy to the target endpoint */
for (i = 0; i < data->nfiledes; i++) {
rc = copy_filp(to_ep, data->filps[i]);
if (rc < 0) {
/* revert set_filp() calls */
for (j = 0; j < data->nfiledes; j++) {
put_filp(data->filps[j]);
}
/* revert copy_filp() calls */
for (j = i; j >= 0; j--) {
cancel_fd(to_ep, data->fds[j]);
}
return rc;
}
data->fds[i] = rc; /* data->fds[i] now has the new FD */
}
for (i = 0; i < data->nfiledes; i++) {
put_filp(data->filps[i]);
#if DEBUG == 1
printf("(uds) recv_fds() => %d\n", data->fds[i]);
#endif
((int *)CMSG_DATA(cmsg))[i] = data->fds[i];
data->fds[i] = -1;
data->filps[i] = NULL;
}
data->nfiledes = 0;
return OK;
}
PRIVATE int recv_cred(int minor, struct ancillary *data,
struct msg_control *msg_ctrl)
{
struct msghdr msghdr;
struct cmsghdr *cmsg;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] recv_cred() call_count=%d\n", minor,
++call_count);
#endif
msghdr.msg_control = msg_ctrl->msg_control;
msghdr.msg_controllen = msg_ctrl->msg_controllen;
cmsg = CMSG_FIRSTHDR(&msghdr);
if (cmsg->cmsg_len > 0) {
cmsg = CMSG_NXTHDR(&msghdr, cmsg);
}
cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_CREDENTIALS;
memcpy(CMSG_DATA(cmsg), &(data->cred), sizeof(struct ucred));
return OK;
}
PUBLIC int do_sendmsg(message *dev_m_in, message *dev_m_out)
{
int minor, peer, rc, i;
struct msg_control msg_ctrl;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_sendmsg() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
memset(&msg_ctrl, '\0', sizeof(struct msg_control));
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &msg_ctrl,
sizeof(struct msg_control), D);
if (rc != OK) {
return EIO;
}
/* locate peer */
peer = -1;
if (uds_fd_table[minor].type == SOCK_DGRAM) {
if (uds_fd_table[minor].target.sun_path[0] == '\0' ||
uds_fd_table[minor].target.sun_family != AF_UNIX) {
return EDESTADDRREQ;
}
for (i = 0; i < NR_FDS; i++) {
/* look for a SOCK_DGRAM socket that is bound on
* the target address
*/
if (uds_fd_table[i].type == SOCK_DGRAM &&
uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(uds_fd_table[minor].target.sun_path,
uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)){
peer = i;
break;
}
}
if (peer == -1) {
return ENOENT;
}
} else {
peer = uds_fd_table[minor].peer;
if (peer == -1) {
return ENOTCONN;
}
}
#if DEBUG == 1
printf("(uds) [%d] sendmsg() -- peer=%d\n", minor, peer);
#endif
/* note: it's possible that there is already some file
* descriptors in ancillary_data if the peer didn't call
* recvmsg() yet. That's okay. The receiver will
* get the current file descriptors plus the new ones.
*/
rc = msg_control_read(&msg_ctrl, &uds_fd_table[peer].ancillary_data,
minor);
if (rc != OK) {
return rc;
}
return send_fds(minor, &uds_fd_table[peer].ancillary_data);
}
PUBLIC int do_recvmsg(message *dev_m_in, message *dev_m_out)
{
int minor;
int rc;
struct msg_control msg_ctrl;
socklen_t controllen_avail = 0;
socklen_t controllen_needed = 0;
socklen_t controllen_desired = 0;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_sendmsg() call_count=%d\n",
uds_minor(dev_m_in), ++call_count);
#endif
minor = uds_minor(dev_m_in);
#if DEBUG == 1
printf("(uds) [%d] CREDENTIALS {pid:%d,uid:%d,gid:%d}\n", minor,
uds_fd_table[minor].ancillary_data.cred.pid,
uds_fd_table[minor].ancillary_data.cred.uid,
uds_fd_table[minor].ancillary_data.cred.gid);
#endif
memset(&msg_ctrl, '\0', sizeof(struct msg_control));
/* get the msg_control from the user, it will include the
* amount of space the user has allocated for control data.
*/
rc = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &msg_ctrl,
sizeof(struct msg_control), D);
if (rc != OK) {
return EIO;
}
controllen_avail = MIN(msg_ctrl.msg_controllen, MSG_CONTROL_MAX);
if (uds_fd_table[minor].ancillary_data.nfiledes > 0) {
controllen_needed = CMSG_LEN(sizeof(int) *
(uds_fd_table[minor].ancillary_data.nfiledes));
}
/* if there is room we also include credentials */
controllen_desired = controllen_needed +
CMSG_LEN(sizeof(struct ucred));
if (controllen_needed > controllen_avail) {
return EOVERFLOW;
}
rc = recv_fds(minor, &uds_fd_table[minor].ancillary_data, &msg_ctrl);
if (rc != OK) {
return rc;
}
if (controllen_desired <= controllen_avail) {
rc = recv_cred(minor, &uds_fd_table[minor].ancillary_data,
&msg_ctrl);
if (rc != OK) {
return rc;
}
}
/* send the user the control data */
rc = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) dev_m_in->IO_GRANT,
(vir_bytes) 0, (vir_bytes) &msg_ctrl,
sizeof(struct msg_control), D);
return rc ? EIO : OK;
}