minix/servers/pfs/uds.c
David van Moolenbroek 665198b4c2 Rewrite character driver protocol
As a side effect, remove the clone style, as the normal device style
supports device cloning now.

Change-Id: Ie82d1ef0385514a04a8faa139129a617895780b5
2014-03-01 09:04:52 +01:00

1543 lines
34 KiB
C

/*
* Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
* This code handles ioctl(2) commands to implement the socket API.
* Some helper functions are also present.
*
* The entry points into this file are...
*
* uds_init: initialize the descriptor table.
* uds_do_ioctl: process an IOCTL request.
* uds_clear_fds: calls put_filp for undelivered FDs.
*
* Also see...
*
* dev_uds.c, uds.h
*/
#define DEBUG 0
#include "inc.h"
#include "const.h"
#include "glo.h"
#include "uds.h"
/* File Descriptor Table */
uds_fd_t uds_fd_table[NR_FDS];
/* initialize the descriptor table */
void uds_init(void)
{
/*
* Setting everything to NULL implicitly sets the
* state to UDS_FREE.
*/
memset(uds_fd_table, '\0', sizeof(uds_fd_t) * NR_FDS);
}
/* check the permissions of a socket file */
static int check_perms(devminor_t minor, struct sockaddr_un *addr)
{
int rc;
message vfs_m;
cp_grant_id_t grant_id;
grant_id = cpf_grant_direct(VFS_PROC_NR, (vir_bytes) addr->sun_path,
UNIX_PATH_MAX, CPF_READ | CPF_WRITE);
/* ask the VFS to verify the permissions */
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = VFS_PFS_CHECK_PERMS;
vfs_m.VFS_PFS_ENDPT = uds_fd_table[minor].owner;
vfs_m.VFS_PFS_GRANT = grant_id;
vfs_m.VFS_PFS_COUNT = UNIX_PATH_MAX;
rc = sendrec(VFS_PROC_NR, &vfs_m);
cpf_revoke(grant_id);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
printf("(uds) Canonical Path => %s\n", addr->sun_path);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
static filp_id_t verify_fd(endpoint_t ep, int fd)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) verify_fd(%d,%d) call_count=%d\n", ep, fd,
++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = VFS_PFS_VERIFY_FD;
vfs_m.VFS_PFS_ENDPT = ep;
vfs_m.VFS_PFS_FD = fd;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return NULL;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.VFS_PFS_FILP;
}
static int set_filp(filp_id_t sfilp)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) set_filp(%p) call_count=%d\n", sfilp, ++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = VFS_PFS_SET_FILP;
vfs_m.VFS_PFS_FILP = sfilp;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
static int copy_filp(endpoint_t to_ep, filp_id_t cfilp)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) copy_filp(%d, %p) call_count=%d\n",to_ep, cfilp,
++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = VFS_PFS_COPY_FILP;
vfs_m.VFS_PFS_ENDPT = to_ep;
vfs_m.VFS_PFS_FILP = cfilp;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type;
}
static int put_filp(filp_id_t pfilp)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) put_filp(%p) call_count=%d\n", pfilp, ++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = VFS_PFS_PUT_FILP;
vfs_m.VFS_PFS_FILP = pfilp;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
static int cancel_fd(endpoint_t ep, int fd)
{
int rc;
message vfs_m;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) cancel_fd(%d,%d) call_count=%d\n", ep, fd, ++call_count);
#endif
memset(&vfs_m, '\0', sizeof(message));
vfs_m.m_type = VFS_PFS_CANCEL_FD;
vfs_m.VFS_PFS_ENDPT = ep;
vfs_m.VFS_PFS_FD = fd;
rc = sendrec(VFS_PROC_NR, &vfs_m);
if (OK != rc) {
printf("(uds) sendrec error... req_nr: %d err: %d\n",
vfs_m.m_type, rc);
return EIO;
}
#if DEBUG == 1
printf("(uds) VFS reply => %d\n", vfs_m.m_type);
#endif
return vfs_m.m_type; /* return reply code OK, ELOOP, etc. */
}
static int perform_connection(devminor_t minorx, devminor_t minory,
struct sockaddr_un *addr)
{
/* there are several places were a connection is established. */
/* accept(2), connect(2), uds_status(2), socketpair(2) */
/* This is a helper function to make sure it is done in the */
/* same way in each place with the same validation checks. */
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] perform_connection() call_count=%d\n", minorx,
++call_count);
#endif
/* only connection oriented types are acceptable and only like
* types can connect to each other
*/
if ((uds_fd_table[minorx].type != SOCK_SEQPACKET &&
uds_fd_table[minorx].type != SOCK_STREAM) ||
uds_fd_table[minorx].type != uds_fd_table[minory].type) {
/* sockets are not in a valid state */
return EINVAL;
}
/* connect the pair of sockets */
uds_fd_table[minorx].peer = minory;
uds_fd_table[minory].peer = minorx;
/* Set the address of both sockets */
memcpy(&(uds_fd_table[minorx].addr), addr, sizeof(struct sockaddr_un));
memcpy(&(uds_fd_table[minory].addr), addr, sizeof(struct sockaddr_un));
return OK;
}
static int do_accept(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
devminor_t minorparent; /* minor number of parent (server) */
devminor_t minorpeer;
int rc, i;
struct sockaddr_un addr;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_accept() call_count=%d\n", minor, ++call_count);
#endif
/* Somewhat weird logic is used in this function, so here's an
* overview... The minor number is the server's client socket
* (the socket to be returned by accept()). The data waiting
* for us in the IO Grant is the address that the server is
* listening on. This function uses the address to find the
* server's descriptor. From there we can perform the
* connection or suspend and wait for a connect().
*/
if (uds_fd_table[minor].type != -1) {
/* this IOCTL must be called on a 'fresh' socket */
return EINVAL;
}
/* Get the server's address */
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &addr,
sizeof(struct sockaddr_un));
if (rc != OK) {
return EIO;
}
/* locate server socket */
rc = -1; /* to trap error */
for (i = 0; i < NR_FDS; i++) {
if (uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(addr.sun_path,
uds_fd_table[i].addr.sun_path,
UNIX_PATH_MAX) &&
uds_fd_table[i].listening == 1) {
rc = 0;
break;
}
}
if (rc == -1) {
/* there is no server listening on addr. Maybe someone
* screwed up the ioctl()?
*/
return EINVAL;
}
minorparent = i; /* parent */
/* we are the parent's child */
uds_fd_table[minorparent].child = minor;
/* the peer has the same type as the parent. we need to be that
* type too.
*/
uds_fd_table[minor].type = uds_fd_table[minorparent].type;
/* locate peer to accept in the parent's backlog */
minorpeer = -1; /* to trap error */
for (i = 0; i < uds_fd_table[minorparent].backlog_size; i++) {
if (uds_fd_table[minorparent].backlog[i] != -1) {
minorpeer = uds_fd_table[minorparent].backlog[i];
uds_fd_table[minorparent].backlog[i] = -1;
rc = 0;
break;
}
}
if (minorpeer == -1) {
#if DEBUG == 1
printf("(uds) [%d] {do_accept} suspend\n", minor);
#endif
/* there are no peers in the backlog, suspend and wait
* for some to show up
*/
uds_fd_table[minor].suspended = UDS_SUSPENDED_ACCEPT;
return EDONTREPLY;
}
#if DEBUG == 1
printf("(uds) [%d] connecting to %d -- parent is %d\n", minor,
minorpeer, minorparent);
#endif
rc = perform_connection(minor, minorpeer, &addr);
if (rc != OK) {
#if DEBUG == 1
printf("(uds) [%d] {do_accept} connection not performed\n",
minor);
#endif
return rc;
}
uds_fd_table[minorparent].child = -1;
/* if peer is blocked on connect() revive peer */
if (uds_fd_table[minorpeer].suspended) {
#if DEBUG == 1
printf("(uds) [%d] {do_accept} revive %d\n", minor,
minorpeer);
#endif
uds_unsuspend(minorpeer);
}
return OK;
}
static int do_connect(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int child;
struct sockaddr_un addr;
int rc, i, j;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_connect() call_count=%d\n", minor, ++call_count);
#endif
/* only connection oriented sockets can connect */
if (uds_fd_table[minor].type != SOCK_STREAM &&
uds_fd_table[minor].type != SOCK_SEQPACKET) {
return EINVAL;
}
if (uds_fd_table[minor].peer != -1) {
/* socket is already connected */
return EISCONN;
}
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &addr,
sizeof(struct sockaddr_un));
if (rc != OK) {
return EIO;
}
rc = check_perms(minor, &addr);
if (rc != OK) {
/* permission denied, socket file doesn't exist, etc. */
return rc;
}
/* look for a socket of the same type that is listening on the
* address we want to connect to
*/
for (i = 0; i < NR_FDS; i++) {
if (uds_fd_table[minor].type == uds_fd_table[i].type &&
uds_fd_table[i].listening &&
uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(addr.sun_path, uds_fd_table[i].addr.sun_path,
UNIX_PATH_MAX)) {
if ((child = uds_fd_table[i].child) != -1) {
/* the server is blocked on accept(2) --
* perform connection to the child
*/
rc = perform_connection(minor, child, &addr);
if (rc == OK) {
uds_fd_table[i].child = -1;
#if DEBUG == 1
printf("(uds) [%d] {do_connect} revive %d\n", minor, child);
#endif
/* wake the parent (server) */
uds_unsuspend(child);
}
return rc;
} else {
#if DEBUG == 1
printf("(uds) [%d] adding to %d's backlog\n",
minor, i);
#endif
/* tell the server were waiting to be served */
/* look for a free slot in the backlog */
rc = -1; /* to trap error */
for (j = 0; j < uds_fd_table[i].backlog_size;
j++) {
if (uds_fd_table[i].backlog[j] == -1) {
uds_fd_table[i].backlog[j] =
minor;
rc = 0;
break;
}
}
if (rc == -1) {
/* backlog is full */
break;
}
/* see if the server is blocked on select() */
if (uds_fd_table[i].sel_ops & CDEV_OP_RD) {
/* if the server wants to know about
* data ready to read and it doesn't
* doesn't know about it already, then
* let the server know we have data for
* it.
*/
chardriver_reply_select(
uds_fd_table[i].sel_endpt, i,
CDEV_OP_RD);
uds_fd_table[i].sel_ops &= ~CDEV_OP_RD;
}
/* we found our server */
uds_fd_table[minor].peer = i;
/* set the address */
memcpy(&(uds_fd_table[minor].addr), &addr,
sizeof(struct sockaddr_un));
break;
}
}
}
if (uds_fd_table[minor].peer == -1) {
/* could not find another open socket listening on the
* specified address with room in the backlog
*/
return ECONNREFUSED;
}
#if DEBUG == 1
printf("(uds) [%d] {do_connect} suspend\n", minor);
#endif
/* suspend until the server side completes the connection with accept()
*/
uds_fd_table[minor].suspended = UDS_SUSPENDED_CONNECT;
return EDONTREPLY;
}
static int do_listen(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int rc;
int backlog_size;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_listen() call_count=%d\n", minor, ++call_count);
#endif
/* ensure the socket has a type and is bound */
if (uds_fd_table[minor].type == -1 ||
uds_fd_table[minor].addr.sun_family != AF_UNIX) {
/* probably trying to call listen() before bind() */
return EINVAL;
}
/* the two supported types for listen(2) are SOCK_STREAM and
* SOCK_SEQPACKET
*/
if (uds_fd_table[minor].type != SOCK_STREAM &&
uds_fd_table[minor].type != SOCK_SEQPACKET) {
/* probably trying to call listen() with a SOCK_DGRAM */
return EOPNOTSUPP;
}
/* The POSIX standard doesn't say what to do if listen() has
* already been called. Well, there isn't an errno. we silently
* let it happen, but if listen() has already been called, we
* don't allow the backlog to shrink
*/
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &backlog_size,
sizeof(int));
if (rc != OK) {
return EIO;
}
if (uds_fd_table[minor].listening == 0) {
/* See if backlog_size is between 0 and UDS_SOMAXCONN */
if (backlog_size >= 0 && backlog_size < UDS_SOMAXCONN) {
/* use the user provided backlog_size */
uds_fd_table[minor].backlog_size = backlog_size;
} else {
/* the user gave an invalid size, use
* UDS_SOMAXCONN instead
*/
uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
}
} else {
/* See if the user is trying to expand the backlog_size */
if (backlog_size > uds_fd_table[minor].backlog_size &&
backlog_size < UDS_SOMAXCONN) {
/* expand backlog_size */
uds_fd_table[minor].backlog_size = backlog_size;
}
/* Don't let the user shrink the backlog_size (we might
* have clients waiting in those slots
*/
}
/* perform listen(2) */
uds_fd_table[minor].listening = 1;
return OK;
}
static int do_socket(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_socket() call_count=%d\n", minor, ++call_count);
#endif
/* see if this socket already has a type */
if (uds_fd_table[minor].type != -1) {
/* socket type can only be set once */
return EINVAL;
}
/* get the requested type */
rc = sys_safecopyfrom(endpt, grant, 0,
(vir_bytes) &uds_fd_table[minor].type, sizeof(int));
if (rc != OK) {
/* something went wrong and we couldn't get the type */
return EIO;
}
/* validate the type */
switch (uds_fd_table[minor].type) {
case SOCK_STREAM:
case SOCK_DGRAM:
case SOCK_SEQPACKET:
/* the type is one of the 3 valid socket types */
return OK;
default:
/* if the type isn't one of the 3 valid socket
* types, then it must be invalid.
*/
/* set the type back to '-1' (no type set) */
uds_fd_table[minor].type = -1;
return EINVAL;
}
}
static int do_bind(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
struct sockaddr_un addr;
int rc, i;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_bind() call_count=%d\n", minor, ++call_count);
#endif
if ((uds_fd_table[minor].type == -1) ||
(uds_fd_table[minor].addr.sun_family == AF_UNIX &&
uds_fd_table[minor].type != SOCK_DGRAM)) {
/* the type hasn't been set by do_socket() yet OR attempting
* to re-bind() a non-SOCK_DGRAM socket
*/
return EINVAL;
}
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &addr,
sizeof(struct sockaddr_un));
if (rc != OK) {
return EIO;
}
/* do some basic sanity checks on the address */
if (addr.sun_family != AF_UNIX) {
/* bad family */
return EAFNOSUPPORT;
}
if (addr.sun_path[0] == '\0') {
/* bad address */
return ENOENT;
}
rc = check_perms(minor, &addr);
if (rc != OK) {
/* permission denied, socket file doesn't exist, etc. */
return rc;
}
/* make sure the address isn't already in use by another socket. */
for (i = 0; i < NR_FDS; i++) {
if ((uds_fd_table[i].addr.sun_family == AF_UNIX) &&
!strncmp(addr.sun_path,
uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) {
/* another socket is bound to this sun_path */
return EADDRINUSE;
}
}
/* looks good, perform the bind() */
memcpy(&(uds_fd_table[minor].addr), &addr, sizeof(struct sockaddr_un));
return OK;
}
static int do_getsockname(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockname() call_count=%d\n", minor,
++call_count);
#endif
/* Unconditionally send the address we have assigned to this socket.
* The POSIX standard doesn't say what to do if the address
* hasn't been set. If the address isn't currently set, then
* the user will get NULL bytes. Note: libc depends on this
* behavior.
*/
rc = sys_safecopyto(endpt, grant, 0,
(vir_bytes) &uds_fd_table[minor].addr,
sizeof(struct sockaddr_un));
return rc ? EIO : OK;
}
static int do_getpeername(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getpeername() call_count=%d\n", minor,
++call_count);
#endif
/* check that the socket is connected with a valid peer */
if (uds_fd_table[minor].peer != -1) {
int peer_minor;
peer_minor = uds_fd_table[minor].peer;
/* copy the address from the peer */
rc = sys_safecopyto(endpt, grant, 0,
(vir_bytes) &uds_fd_table[peer_minor].addr,
sizeof(struct sockaddr_un));
return rc ? EIO : OK;
} else {
if (uds_fd_table[minor].err == ECONNRESET) {
uds_fd_table[minor].err = 0;
return ECONNRESET;
} else {
return ENOTCONN;
}
}
}
static int do_shutdown(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int rc, how;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_shutdown() call_count=%d\n", minor,
++call_count);
#endif
if (uds_fd_table[minor].type != SOCK_STREAM &&
uds_fd_table[minor].type != SOCK_SEQPACKET) {
/* socket must be a connection oriented socket */
return EINVAL;
}
if (uds_fd_table[minor].peer == -1) {
/* shutdown(2) is only valid for connected sockets */
if (uds_fd_table[minor].err == ECONNRESET) {
return ECONNRESET;
} else {
return ENOTCONN;
}
}
/* get the 'how' parameter from the process */
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &how, sizeof(int));
if (rc != OK) {
return EIO;
}
switch (how) {
case SHUT_RD:
/* take away read permission */
uds_fd_table[minor].mode &= ~S_IRUSR;
break;
case SHUT_WR:
/* take away write permission */
uds_fd_table[minor].mode &= ~S_IWUSR;
break;
case SHUT_RDWR:
/* completely shutdown */
uds_fd_table[minor].mode = 0;
break;
default:
/* the 'how' parameter is invalid */
return EINVAL;
}
return OK;
}
static int do_socketpair(devminor_t minorx, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
dev_t minorin;
devminor_t minory;
struct sockaddr_un addr;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_socketpair() call_count=%d\n", minorx,
++call_count);
#endif
/* ioctl argument is the minor number of the second socket */
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &minorin,
sizeof(dev_t));
if (rc != OK) {
return EIO;
}
minory = minor(minorin);
#if DEBUG == 1
printf("socketpair() %d - %d\n", minorx, minory);
#endif
/* security check - both sockets must have the same endpoint (owner) */
if (uds_fd_table[minorx].owner != uds_fd_table[minory].owner) {
/* we won't allow you to magically connect your socket to
* someone elses socket
*/
return EPERM;
}
addr.sun_family = AF_UNIX;
addr.sun_path[0] = 'X';
addr.sun_path[1] = '\0';
return perform_connection(minorx, minory, &addr);
}
static int do_getsockopt_sotype(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_sotype() call_count=%d\n", minor,
++call_count);
#endif
if (uds_fd_table[minor].type == -1) {
/* the type hasn't been set yet. instead of returning an
* invalid type, we fail with EINVAL
*/
return EINVAL;
}
rc = sys_safecopyto(endpt, grant, 0,
(vir_bytes) &uds_fd_table[minor].type, sizeof(int));
return rc ? EIO : OK;
}
static int do_getsockopt_peercred(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int peer_minor;
int rc;
struct uucred cred;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_peercred() call_count=%d\n", minor,
++call_count);
#endif
if (uds_fd_table[minor].peer == -1) {
if (uds_fd_table[minor].err == ECONNRESET) {
uds_fd_table[minor].err = 0;
return ECONNRESET;
} else {
return ENOTCONN;
}
}
peer_minor = uds_fd_table[minor].peer;
/* obtain the peer's credentials */
rc = getnucred(uds_fd_table[peer_minor].owner, &cred);
if (rc == -1) {
/* likely error: invalid endpoint / proc doesn't exist */
return errno;
}
rc = sys_safecopyto(endpt, grant, 0, (vir_bytes) &cred,
sizeof(struct uucred));
return rc ? EIO : OK;
}
static int do_getsockopt_sndbuf(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
size_t sndbuf = PIPE_BUF;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_sndbuf() call_count=%d\n", minor,
++call_count);
#endif
rc = sys_safecopyto(endpt, grant, 0, (vir_bytes) &sndbuf,
sizeof(size_t));
return rc ? EIO : OK;
}
static int do_setsockopt_sndbuf(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
size_t sndbuf;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n", minor,
++call_count);
#endif
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &sndbuf,
sizeof(size_t));
if (rc != OK) {
return EIO;
}
if (sndbuf > PIPE_BUF) {
/* The send buffer is limited to 32K at the moment. */
return ENOSYS;
}
/* There is no way to reduce the send buffer, do we have to
* let this call fail for smaller buffers?
*/
return OK;
}
static int do_getsockopt_rcvbuf(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
size_t rcvbuf = PIPE_BUF;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_getsockopt_rcvbuf() call_count=%d\n", minor,
++call_count);
#endif
rc = sys_safecopyto(endpt, grant, 0, (vir_bytes) &rcvbuf,
sizeof(size_t));
return rc ? EIO : OK;
}
static int do_setsockopt_rcvbuf(devminor_t minor, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
size_t rcvbuf;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_setsockopt_rcvbuf() call_count=%d\n", minor,
++call_count);
#endif
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &rcvbuf,
sizeof(size_t));
if (rc != OK) {
return EIO;
}
if (rcvbuf > PIPE_BUF) {
/* The send buffer is limited to 32K at the moment. */
return ENOSYS;
}
/* There is no way to reduce the send buffer, do we have to
* let this call fail for smaller buffers?
*/
return OK;
}
static int do_sendto(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int rc;
struct sockaddr_un addr;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_sendto() call_count=%d\n", minor, ++call_count);
#endif
if (uds_fd_table[minor].type != SOCK_DGRAM) {
/* This IOCTL is only for SOCK_DGRAM sockets */
return EINVAL;
}
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &addr,
sizeof(struct sockaddr_un));
if (rc != OK) {
return EIO;
}
/* do some basic sanity checks on the address */
if (addr.sun_family != AF_UNIX || addr.sun_path[0] == '\0') {
/* bad address */
return EINVAL;
}
rc = check_perms(minor, &addr);
if (rc != OK) {
return rc;
}
memcpy(&(uds_fd_table[minor].target), &addr,
sizeof(struct sockaddr_un));
return OK;
}
static int do_recvfrom(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int rc;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_recvfrom() call_count=%d\n", minor,
++call_count);
#endif
rc = sys_safecopyto(endpt, grant, 0,
(vir_bytes) &uds_fd_table[minor].source,
sizeof(struct sockaddr_un));
return rc ? EIO : OK;
}
static int msg_control_read(struct msg_control *msg_ctrl,
struct ancillary *data, devminor_t minor)
{
int rc;
struct msghdr msghdr;
struct cmsghdr *cmsg = NULL;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] msg_control_read() call_count=%d\n", minor,
++call_count);
#endif
data->nfiledes = 0;
memset(&msghdr, '\0', sizeof(struct msghdr));
msghdr.msg_control = msg_ctrl->msg_control;
msghdr.msg_controllen = msg_ctrl->msg_controllen;
for(cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_RIGHTS) {
int i;
int nfds =
MIN((cmsg->cmsg_len-CMSG_LEN(0))/sizeof(int),
OPEN_MAX);
for (i = 0; i < nfds; i++) {
if (data->nfiledes == OPEN_MAX) {
return EOVERFLOW;
}
data->fds[data->nfiledes] =
((int *) CMSG_DATA(cmsg))[i];
#if DEBUG == 1
printf("(uds) [%d] fd[%d]=%d\n", minor,
data->nfiledes, data->fds[data->nfiledes]);
#endif
data->nfiledes++;
}
}
}
/* obtain this socket's credentials */
rc = getnucred(uds_fd_table[minor].owner, &(data->cred));
if (rc == -1) {
return errno;
}
#if DEBUG == 1
printf("(uds) [%d] cred={%d,%d,%d}\n", minor,
data->cred.pid, data->cred.uid,
data->cred.gid);
#endif
return OK;
}
static int send_fds(devminor_t minor, struct ancillary *data)
{
int rc, i, j;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] send_fds() call_count=%d\n", minor, ++call_count);
#endif
/* verify the file descriptors and get their filps. */
for (i = 0; i < data->nfiledes; i++) {
data->filps[i] = verify_fd(uds_fd_table[minor].owner,
data->fds[i]);
if (data->filps[i] == NULL) {
return EINVAL;
}
}
/* set them as in-flight */
for (i = 0; i < data->nfiledes; i++) {
rc = set_filp(data->filps[i]);
if (rc != OK) {
/* revert set_filp() calls */
for (j = i; j >= 0; j--) {
put_filp(data->filps[j]);
}
return rc;
}
}
return OK;
}
int uds_clear_fds(devminor_t minor, struct ancillary *data)
{
/* This function calls put_filp() for all of the FDs in data.
* This is used when a Unix Domain Socket is closed and there
* exists references to file descriptors that haven't been received
* with recvmsg().
*/
int i;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] uds_clear_fds() call_count=%d\n", minor,
++call_count);
#endif
for (i = 0; i < data->nfiledes; i++) {
put_filp(data->filps[i]);
#if DEBUG == 1
printf("(uds) uds_clear_fds() => %d\n", data->fds[i]);
#endif
data->fds[i] = -1;
data->filps[i] = NULL;
}
data->nfiledes = 0;
return OK;
}
static int recv_fds(devminor_t minor, struct ancillary *data,
struct msg_control *msg_ctrl)
{
int rc, i, j;
struct msghdr msghdr;
struct cmsghdr *cmsg;
endpoint_t to_ep;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] recv_fds() call_count=%d\n", minor,
++call_count);
#endif
msghdr.msg_control = msg_ctrl->msg_control;
msghdr.msg_controllen = msg_ctrl->msg_controllen;
cmsg = CMSG_FIRSTHDR(&msghdr);
cmsg->cmsg_len = CMSG_LEN(sizeof(int) * data->nfiledes);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
to_ep = uds_fd_table[minor].owner;
/* copy to the target endpoint */
for (i = 0; i < data->nfiledes; i++) {
rc = copy_filp(to_ep, data->filps[i]);
if (rc < 0) {
/* revert set_filp() calls */
for (j = 0; j < data->nfiledes; j++) {
put_filp(data->filps[j]);
}
/* revert copy_filp() calls */
for (j = i; j >= 0; j--) {
cancel_fd(to_ep, data->fds[j]);
}
return rc;
}
data->fds[i] = rc; /* data->fds[i] now has the new FD */
}
for (i = 0; i < data->nfiledes; i++) {
put_filp(data->filps[i]);
#if DEBUG == 1
printf("(uds) recv_fds() => %d\n", data->fds[i]);
#endif
((int *)CMSG_DATA(cmsg))[i] = data->fds[i];
data->fds[i] = -1;
data->filps[i] = NULL;
}
data->nfiledes = 0;
return OK;
}
static int recv_cred(devminor_t minor, struct ancillary *data,
struct msg_control *msg_ctrl)
{
struct msghdr msghdr;
struct cmsghdr *cmsg;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] recv_cred() call_count=%d\n", minor,
++call_count);
#endif
msghdr.msg_control = msg_ctrl->msg_control;
msghdr.msg_controllen = msg_ctrl->msg_controllen;
cmsg = CMSG_FIRSTHDR(&msghdr);
if (cmsg->cmsg_len > 0) {
cmsg = CMSG_NXTHDR(&msghdr, cmsg);
}
cmsg->cmsg_len = CMSG_LEN(sizeof(struct uucred));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_CREDENTIALS;
memcpy(CMSG_DATA(cmsg), &(data->cred), sizeof(struct uucred));
return OK;
}
static int do_sendmsg(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int peer, rc, i;
struct msg_control msg_ctrl;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_sendmsg() call_count=%d\n", minor, ++call_count);
#endif
memset(&msg_ctrl, '\0', sizeof(struct msg_control));
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &msg_ctrl,
sizeof(struct msg_control));
if (rc != OK) {
return EIO;
}
/* locate peer */
peer = -1;
if (uds_fd_table[minor].type == SOCK_DGRAM) {
if (uds_fd_table[minor].target.sun_path[0] == '\0' ||
uds_fd_table[minor].target.sun_family != AF_UNIX) {
return EDESTADDRREQ;
}
for (i = 0; i < NR_FDS; i++) {
/* look for a SOCK_DGRAM socket that is bound on
* the target address
*/
if (uds_fd_table[i].type == SOCK_DGRAM &&
uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(uds_fd_table[minor].target.sun_path,
uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)){
peer = i;
break;
}
}
if (peer == -1) {
return ENOENT;
}
} else {
peer = uds_fd_table[minor].peer;
if (peer == -1) {
return ENOTCONN;
}
}
#if DEBUG == 1
printf("(uds) [%d] sendmsg() -- peer=%d\n", minor, peer);
#endif
/* note: it's possible that there is already some file
* descriptors in ancillary_data if the peer didn't call
* recvmsg() yet. That's okay. The receiver will
* get the current file descriptors plus the new ones.
*/
rc = msg_control_read(&msg_ctrl, &uds_fd_table[peer].ancillary_data,
minor);
if (rc != OK) {
return rc;
}
return send_fds(minor, &uds_fd_table[peer].ancillary_data);
}
static int do_recvmsg(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant)
{
int rc;
struct msg_control msg_ctrl;
socklen_t controllen_avail = 0;
socklen_t controllen_needed = 0;
socklen_t controllen_desired = 0;
#if DEBUG == 1
static int call_count = 0;
printf("(uds) [%d] do_sendmsg() call_count=%d\n", minor, ++call_count);
printf("(uds) [%d] CREDENTIALS {pid:%d,uid:%d,gid:%d}\n", minor,
uds_fd_table[minor].ancillary_data.cred.pid,
uds_fd_table[minor].ancillary_data.cred.uid,
uds_fd_table[minor].ancillary_data.cred.gid);
#endif
memset(&msg_ctrl, '\0', sizeof(struct msg_control));
/* get the msg_control from the user, it will include the
* amount of space the user has allocated for control data.
*/
rc = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &msg_ctrl,
sizeof(struct msg_control));
if (rc != OK) {
return EIO;
}
controllen_avail = MIN(msg_ctrl.msg_controllen, MSG_CONTROL_MAX);
if (uds_fd_table[minor].ancillary_data.nfiledes > 0) {
controllen_needed = CMSG_LEN(sizeof(int) *
(uds_fd_table[minor].ancillary_data.nfiledes));
}
/* if there is room we also include credentials */
controllen_desired = controllen_needed +
CMSG_LEN(sizeof(struct uucred));
if (controllen_needed > controllen_avail) {
return EOVERFLOW;
}
rc = recv_fds(minor, &uds_fd_table[minor].ancillary_data, &msg_ctrl);
if (rc != OK) {
return rc;
}
if (controllen_desired <= controllen_avail) {
rc = recv_cred(minor, &uds_fd_table[minor].ancillary_data,
&msg_ctrl);
if (rc != OK) {
return rc;
}
}
/* send the user the control data */
rc = sys_safecopyto(endpt, grant, 0, (vir_bytes) &msg_ctrl,
sizeof(struct msg_control));
return rc ? EIO : OK;
}
int uds_do_ioctl(devminor_t minor, unsigned long request, endpoint_t endpt,
cp_grant_id_t grant)
{
int rc;
switch (request) { /* Handle the ioctl(2) command */
case NWIOSUDSCONN:
/* connect to a listening socket -- connect() */
rc = do_connect(minor, endpt, grant);
break;
case NWIOSUDSACCEPT:
/* accept an incoming connection -- accept() */
rc = do_accept(minor, endpt, grant);
break;
case NWIOSUDSBLOG:
/* set the backlog_size and put the socket into the listening
* state -- listen()
*/
rc = do_listen(minor, endpt, grant);
break;
case NWIOSUDSTYPE:
/* set the type for this socket (i.e. SOCK_STREAM, SOCK_DGRAM,
* etc) -- socket()
*/
rc = do_socket(minor, endpt, grant);
break;
case NWIOSUDSADDR:
/* set the address for this socket -- bind() */
rc = do_bind(minor, endpt, grant);
break;
case NWIOGUDSADDR:
/* get the address for this socket -- getsockname() */
rc = do_getsockname(minor, endpt, grant);
break;
case NWIOGUDSPADDR:
/* get the address for the peer -- getpeername() */
rc = do_getpeername(minor, endpt, grant);
break;
case NWIOSUDSSHUT:
/* shutdown a socket for reading, writing, or both --
* shutdown()
*/
rc = do_shutdown(minor, endpt, grant);
break;
case NWIOSUDSPAIR:
/* connect two sockets -- socketpair() */
rc = do_socketpair(minor, endpt, grant);
break;
case NWIOGUDSSOTYPE:
/* get socket type -- getsockopt(SO_TYPE) */
rc = do_getsockopt_sotype(minor, endpt, grant);
break;
case NWIOGUDSPEERCRED:
/* get peer endpoint -- getsockopt(SO_PEERCRED) */
rc = do_getsockopt_peercred(minor, endpt, grant);
break;
case NWIOSUDSTADDR:
/* set target address -- sendto() */
rc = do_sendto(minor, endpt, grant);
break;
case NWIOGUDSFADDR:
/* get from address -- recvfrom() */
rc = do_recvfrom(minor, endpt, grant);
break;
case NWIOGUDSSNDBUF:
/* get the send buffer size -- getsockopt(SO_SNDBUF) */
rc = do_getsockopt_sndbuf(minor, endpt, grant);
break;
case NWIOSUDSSNDBUF:
/* set the send buffer size -- setsockopt(SO_SNDBUF) */
rc = do_setsockopt_sndbuf(minor, endpt, grant);
break;
case NWIOGUDSRCVBUF:
/* get the send buffer size -- getsockopt(SO_SNDBUF) */
rc = do_getsockopt_rcvbuf(minor, endpt, grant);
break;
case NWIOSUDSRCVBUF:
/* set the send buffer size -- setsockopt(SO_SNDBUF) */
rc = do_setsockopt_rcvbuf(minor, endpt, grant);
break;
case NWIOSUDSCTRL:
/* set the control data -- sendmsg() */
rc = do_sendmsg(minor, endpt, grant);
break;
case NWIOGUDSCTRL:
/* set the control data -- recvmsg() */
rc = do_recvmsg(minor, endpt, grant);
break;
default:
/* the IOCTL command is not valid for /dev/uds -- this happens
* a lot and is normal. a lot of libc functions determine the
* socket type with IOCTLs. Any not for us simply get an ENOTTY
* response.
*/
rc = ENOTTY;
}
return rc;
}