/* * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL) * This code handles requests generated by operations on /dev/uds * * The entry points into this file are... * * uds_request: process a character device request * * Also See... * * uds.c, uds.h * * Overview * * The interface to unix domain sockets is similar to the * the interface to network sockets. There is a character * device (/dev/uds) that uses STYLE_CLONE and this server * is a 'driver' for that device. */ #define DEBUG 0 #include "inc.h" #include "const.h" #include "glo.h" #include "uds.h" static ssize_t uds_perform_read(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant, size_t size, int pretend); static ssize_t uds_perform_write(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant, size_t size, int pretend); static int uds_open(devminor_t orig_minor, int access, endpoint_t user_endpt); static int uds_close(devminor_t minor); static ssize_t uds_read(devminor_t minor, u64_t position, endpoint_t endpt, cp_grant_id_t grant, size_t size, int flags, cdev_id_t id); static ssize_t uds_write(devminor_t minor, u64_t position, endpoint_t endpt, cp_grant_id_t grant, size_t size, int flags, cdev_id_t id); static int uds_ioctl(devminor_t minor, unsigned long request, endpoint_t endpt, cp_grant_id_t grant, int flags, endpoint_t user_endpt, cdev_id_t id); static int uds_cancel(devminor_t minor, endpoint_t endpt, cdev_id_t id); static int uds_select(devminor_t minor, unsigned int ops, endpoint_t endpt); static struct chardriver uds_tab = { .cdr_open = uds_open, .cdr_close = uds_close, .cdr_read = uds_read, .cdr_write = uds_write, .cdr_ioctl = uds_ioctl, .cdr_cancel = uds_cancel, .cdr_select = uds_select }; void uds_request(message *m_ptr, int ipc_status) { /* Use libchardriver to process character device requests. */ chardriver_process(&uds_tab, m_ptr, ipc_status); } static int uds_open(devminor_t UNUSED(orig_minor), int access, endpoint_t user_endpt) { message fs_m_in, fs_m_out; struct uucred ucred; devminor_t minor; int rc, i; #if DEBUG == 1 static int call_count = 0; printf("(uds) [-] uds_open() call_count=%d\n", ++call_count); printf("Endpoint: 0x%x\n", user_endpt); #endif /* * Find a slot in the descriptor table for the new descriptor. * The index of the descriptor in the table will be returned. * Subsequent calls to read/write/close/ioctl/etc will use this * minor number. The minor number must be different from the * the /dev/uds device's minor number (currently 0). */ minor = -1; /* to trap error */ for (i = 1; i < NR_FDS; i++) { if (uds_fd_table[i].state == UDS_FREE) { minor = i; break; } } if (minor == -1) return ENFILE; /* * We found a slot in uds_fd_table, now initialize the descriptor */ /* mark this one as 'in use' so that it doesn't get assigned to * another socket */ uds_fd_table[minor].state = UDS_INUSE; /* set the socket owner */ uds_fd_table[minor].owner = user_endpt; /* setup select(2) framework */ uds_fd_table[minor].sel_endpt = NONE; uds_fd_table[minor].sel_ops = 0; /* initialize the data pointer (pos) to the start of the PIPE */ uds_fd_table[minor].pos = 0; /* the PIPE is initially empty */ uds_fd_table[minor].size = 0; /* the default for a new socket is to allow reading and writing. * shutdown(2) will remove one or both flags. */ uds_fd_table[minor].mode = S_IRUSR|S_IWUSR; /* In libc socket(2) sets this to the actual value later with the * NWIOSUDSTYPE ioctl(). */ uds_fd_table[minor].type = -1; /* Clear the backlog by setting each entry to -1 */ for (i = 0; i < UDS_SOMAXCONN; i++) { /* initially no connections are pending */ uds_fd_table[minor].backlog[i] = -1; } memset(&uds_fd_table[minor].ancillary_data, '\0', sizeof(struct ancillary)); for (i = 0; i < OPEN_MAX; i++) { uds_fd_table[minor].ancillary_data.fds[i] = -1; } /* default the size to UDS_SOMAXCONN */ uds_fd_table[minor].backlog_size = UDS_SOMAXCONN; /* the socket isn't listening for incoming connections until * listen(2) is called */ uds_fd_table[minor].listening = 0; /* initially the socket is not connected to a peer */ uds_fd_table[minor].peer = -1; /* there isn't a child waiting to be accept(2)'d */ uds_fd_table[minor].child = -1; /* initially the socket is not bound or listening on an address */ memset(&(uds_fd_table[minor].addr), '\0', sizeof(struct sockaddr_un)); memset(&(uds_fd_table[minor].source), '\0', sizeof(struct sockaddr_un)); memset(&(uds_fd_table[minor].target), '\0', sizeof(struct sockaddr_un)); /* Initially the socket isn't suspended. */ uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED; /* get the effective user id and effective group id from the endpoint */ /* this is needed in the REQ_NEWNODE request to PFS. */ rc = getnucred(user_endpt, &ucred); if (rc == -1) { /* roll back the changes we made to the descriptor */ memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t)); /* likely error: invalid endpoint / proc doesn't exist */ return EIO; } /* Prepare Request to the FS side of PFS */ fs_m_in.m_type = REQ_NEWNODE; fs_m_in.REQ_MODE = I_NAMED_PIPE; fs_m_in.REQ_DEV = NO_DEV; fs_m_in.REQ_UID = ucred.cr_uid; fs_m_in.REQ_GID = ucred.cr_gid; /* Request a new inode on the pipe file system */ rc = fs_newnode(&fs_m_in, &fs_m_out); if (rc != OK) { /* roll back the changes we made to the descriptor */ memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t)); /* likely error: get_block() failed */ return rc; } /* Process the response */ uds_fd_table[minor].inode_nr = fs_m_out.RES_INODE_NR; return minor; /* cloned! */ } static int uds_close(devminor_t minor) { message fs_m_in, fs_m_out; int rc; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_close() call_count=%d\n", minor, ++call_count); #endif if (minor < 0 || minor >= NR_FDS) return ENXIO; if (uds_fd_table[minor].state != UDS_INUSE) { /* attempted to close a socket that hasn't been opened -- * something is very wrong :( */ return EINVAL; } /* if the socket is connected, disconnect it */ if (uds_fd_table[minor].peer != -1) { int peer = uds_fd_table[minor].peer; /* set peer of this peer to -1 */ uds_fd_table[peer].peer = -1; /* error to pass to peer */ uds_fd_table[peer].err = ECONNRESET; /* if peer was blocked on I/O revive peer */ if (uds_fd_table[peer].suspended != UDS_NOT_SUSPENDED) uds_unsuspend(peer); } if (uds_fd_table[minor].ancillary_data.nfiledes > 0) { uds_clear_fds(minor, &uds_fd_table[minor].ancillary_data); } /* Prepare Request to the FS side of PFS */ fs_m_in.m_type = REQ_PUTNODE; fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr; fs_m_in.REQ_COUNT = 1; /* set the socket back to its original UDS_FREE state */ memset(&(uds_fd_table[minor]), '\0', sizeof(uds_fd_t)); /* Request the removal of the inode from the pipe file system */ rc = fs_putnode(&fs_m_in, &fs_m_out); if (rc != OK) { printf("PFS: fs_putnode returned %d\n", rc); return rc; } return OK; } static int uds_select(devminor_t minor, unsigned int ops, endpoint_t endpt) { unsigned int ready_ops; int i, bytes, watch; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_select() call_count=%d\n", minor, ++call_count); #endif if (minor < 0 || minor >= NR_FDS) return ENXIO; if (uds_fd_table[minor].state != UDS_INUSE) { /* attempted to select on a socket that hasn't been opened -- * something is very wrong :( */ return EINVAL; } watch = (ops & SEL_NOTIFY); ops &= (SEL_RD | SEL_WR | SEL_ERR); ready_ops = 0; /* check if there is data available to read */ if (ops & SEL_RD) { bytes = uds_perform_read(minor, NONE, GRANT_INVALID, 1, 1); if (bytes > 0) { /* there is data in the pipe for us to read */ ready_ops |= SEL_RD; } else if (uds_fd_table[minor].listening == 1) { /* check for pending connections */ for (i = 0; i < uds_fd_table[minor].backlog_size; i++) { if (uds_fd_table[minor].backlog[i] != -1) { ready_ops |= SEL_RD; break; } } } else if (bytes != SUSPEND) { ready_ops |= SEL_RD; } } /* check if we can write without blocking */ if (ops & SEL_WR) { bytes = uds_perform_write(minor, NONE, GRANT_INVALID, PIPE_BUF, 1); if (bytes != 0 && bytes != SUSPEND) { /* There is room to write or there is an error * condition. */ ready_ops |= SEL_WR; } } /* If not all requested ops were ready, and the caller requests to be * notified about changes, we add the remaining ops to the saved set. */ ops &= ~ready_ops; if (ops && watch) { uds_fd_table[minor].sel_endpt = endpt; uds_fd_table[minor].sel_ops |= ops; } return ready_ops; } static ssize_t uds_perform_read(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant, size_t size, int pretend) { int rc, peer; message fs_m_in; message fs_m_out; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_perform_read() call_count=%d\n", minor, ++call_count); #endif peer = uds_fd_table[minor].peer; /* skip reads and writes of 0 (or less!) bytes */ if (size <= 0) { return 0; } /* check if we are allowed to read */ if (!(uds_fd_table[minor].mode & S_IRUSR)) { /* socket is shutdown for reading */ return EPIPE; } if (uds_fd_table[minor].size == 0) { if (peer == -1) { /* We're not connected. That's only a problem when this * socket is connection oriented. */ if (uds_fd_table[minor].type == SOCK_STREAM || uds_fd_table[minor].type == SOCK_SEQPACKET) { if (uds_fd_table[minor].err == ECONNRESET) { uds_fd_table[minor].err = 0; return ECONNRESET; } else { return ENOTCONN; } } } /* Check if process is reading from a closed pipe */ if (peer != -1 && !(uds_fd_table[peer].mode & S_IWUSR) && uds_fd_table[minor].size == 0) { return 0; } if (pretend) { return SUSPEND; } /* maybe a process is blocked waiting to write? if * needed revive the writer */ if (peer != -1 && uds_fd_table[peer].suspended == UDS_SUSPENDED_WRITE) uds_unsuspend(peer); #if DEBUG == 1 printf("(uds) [%d] suspending read request\n", minor); #endif /* Process is reading from an empty pipe, * suspend it so some bytes can be written */ return EDONTREPLY; } if (pretend) { return (size > uds_fd_table[minor].size) ? uds_fd_table[minor].size : size; } /* Prepare Request to the FS side of PFS */ fs_m_in.m_source = endpt; fs_m_in.m_type = REQ_READ; fs_m_in.REQ_INODE_NR = uds_fd_table[minor].inode_nr; fs_m_in.REQ_GRANT = grant; fs_m_in.REQ_SEEK_POS_HI = 0; fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[minor].pos; fs_m_in.REQ_NBYTES = (size > uds_fd_table[minor].size) ? uds_fd_table[minor].size : size; /* perform the read */ rc = fs_readwrite(&fs_m_in, &fs_m_out); if (rc != OK) { printf("PFS: fs_readwrite returned %d\n", rc); return rc; } /* Process the response */ #if DEBUG == 1 printf("(uds) [%d] read complete\n", minor); #endif /* move the position of the data pointer up to data we haven't * read yet */ uds_fd_table[minor].pos += fs_m_out.RES_NBYTES; /* decrease the number of unread bytes */ uds_fd_table[minor].size -= fs_m_out.RES_NBYTES; /* if we have 0 unread bytes, move the data pointer back to the * start of the buffer */ if (uds_fd_table[minor].size == 0) { uds_fd_table[minor].pos = 0; } /* maybe a big write was waiting for us to read some data, if * needed revive the writer */ if (peer != -1 && uds_fd_table[peer].suspended == UDS_SUSPENDED_WRITE) uds_unsuspend(peer); /* see if peer is blocked on select() and a write is possible (from * peer to minor); if the peer wants to know about write being possible * and it doesn't know about it already, then let the peer know. */ if (peer != -1 && (uds_fd_table[peer].sel_ops & SEL_WR) && (uds_fd_table[minor].size+uds_fd_table[minor].pos + 1 < PIPE_BUF)){ /* a write on peer is possible now */ chardriver_reply_select(uds_fd_table[peer].sel_endpt, peer, SEL_WR); uds_fd_table[peer].sel_ops &= ~SEL_WR; } return fs_m_out.RES_NBYTES; /* return number of bytes read */ } static ssize_t uds_perform_write(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant, size_t size, int pretend) { int rc, peer, i; message fs_m_in; message fs_m_out; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_perform_write() call_count=%d\n", minor, ++call_count); #endif /* skip reads and writes of 0 (or less!) bytes */ if (size <= 0) { return 0; } /* check if we are allowed to write */ if (!(uds_fd_table[minor].mode & S_IWUSR)) { /* socket is shutdown for writing */ return EPIPE; } if (size > PIPE_BUF) { /* message is too big to ever write to the PIPE */ return EMSGSIZE; } if (uds_fd_table[minor].type == SOCK_STREAM || uds_fd_table[minor].type == SOCK_SEQPACKET) { /* if we're writing with a connection oriented socket, * then it needs a peer to write to */ if (uds_fd_table[minor].peer == -1) { if (uds_fd_table[minor].err == ECONNRESET) { uds_fd_table[minor].err = 0; return ECONNRESET; } else { return ENOTCONN; } } else { peer = uds_fd_table[minor].peer; } } else /* uds_fd_table[minor].type == SOCK_DGRAM */ { peer = -1; /* locate the "peer" we want to write to */ for (i = 0; i < NR_FDS; i++) { /* look for a SOCK_DGRAM socket that is bound on * the target address */ if (uds_fd_table[i].type == SOCK_DGRAM && uds_fd_table[i].addr.sun_family == AF_UNIX && !strncmp(uds_fd_table[minor].target.sun_path, uds_fd_table[i].addr.sun_path, UNIX_PATH_MAX)) { peer = i; break; } } } if (peer == -1) { if (pretend) return SUSPEND; return ENOENT; } /* check if we write to a closed pipe */ if (!(uds_fd_table[peer].mode & S_IRUSR)) { return EPIPE; } /* we have to preserve the boundary for DGRAM. if there's * already a packet waiting, discard it silently and pretend * it was written. */ if(uds_fd_table[minor].type == SOCK_DGRAM && uds_fd_table[peer].size > 0) { return size; } /* check if write would overrun buffer. check if message * SEQPACKET wouldn't write to an empty buffer. check if * connectionless sockets have a target to write to. */ if ((uds_fd_table[peer].pos+uds_fd_table[peer].size+size > PIPE_BUF) || ((uds_fd_table[minor].type == SOCK_SEQPACKET) && uds_fd_table[peer].size > 0)) { if (pretend) { return SUSPEND; } /* if needed revive the reader */ if (uds_fd_table[peer].suspended == UDS_SUSPENDED_READ) uds_unsuspend(peer); #if DEBUG == 1 printf("(uds) [%d] suspending write request\n", minor); #endif /* Process is reading from an empty pipe, * suspend it so some bytes can be written */ return EDONTREPLY; } if (pretend) { return size; } /* Prepare Request to the FS side of PFS */ fs_m_in.m_source = endpt; fs_m_in.m_type = REQ_WRITE; fs_m_in.REQ_INODE_NR = uds_fd_table[peer].inode_nr; fs_m_in.REQ_GRANT = grant; fs_m_in.REQ_SEEK_POS_HI = 0; fs_m_in.REQ_SEEK_POS_LO = uds_fd_table[peer].pos + uds_fd_table[peer].size; fs_m_in.REQ_NBYTES = size; /* Request the write */ rc = fs_readwrite(&fs_m_in, &fs_m_out); if (rc != OK) { printf("PFS: fs_readwrite returned %d\n", rc); return rc; } /* Process the response */ #if DEBUG == 1 printf("(uds) [%d] write complete\n", minor); #endif /* increase the count of unread bytes */ uds_fd_table[peer].size += fs_m_out.RES_NBYTES; /* fill in the source address to be returned by recvfrom & recvmsg */ if (uds_fd_table[minor].type == SOCK_DGRAM) { memcpy(&uds_fd_table[peer].source, &uds_fd_table[minor].addr, sizeof(struct sockaddr_un)); } /* revive peer that was waiting for us to write */ if (uds_fd_table[peer].suspended == UDS_SUSPENDED_READ) uds_unsuspend(peer); /* see if peer is blocked on select(); if the peer wants to know about * data ready to read and it doesn't know about it already, then let * the peer know we have data for it. */ if ((uds_fd_table[peer].sel_ops & SEL_RD) && fs_m_out.RES_NBYTES > 0) { /* a read on peer is possible now */ chardriver_reply_select(uds_fd_table[peer].sel_endpt, peer, SEL_RD); uds_fd_table[peer].sel_ops &= ~SEL_RD; } return fs_m_out.RES_NBYTES; /* return number of bytes written */ } static ssize_t uds_read(devminor_t minor, u64_t position, endpoint_t endpt, cp_grant_id_t grant, size_t size, int flags, cdev_id_t id) { ssize_t rc; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_read() call_count=%d\n", minor, ++call_count); #endif if (minor < 0 || minor >= NR_FDS) return ENXIO; if (uds_fd_table[minor].state != UDS_INUSE) { /* attempted to read from a socket that hasn't been opened -- * something is very wrong :( */ return EINVAL; } rc = uds_perform_read(minor, endpt, grant, size, 0); /* If the call couldn't complete, suspend the caller. */ if (rc == EDONTREPLY) { uds_fd_table[minor].suspended = UDS_SUSPENDED_READ; uds_fd_table[minor].susp_endpt = endpt; uds_fd_table[minor].susp_grant = grant; uds_fd_table[minor].susp_size = size; uds_fd_table[minor].susp_id = id; /* If the call wasn't supposed to block, cancel immediately. */ if (flags & FLG_OP_NONBLOCK) { uds_cancel(minor, endpt, id); rc = EAGAIN; } } return rc; } static ssize_t uds_write(devminor_t minor, u64_t position, endpoint_t endpt, cp_grant_id_t grant, size_t size, int flags, cdev_id_t id) { ssize_t rc; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_write() call_count=%d\n", minor, ++call_count); #endif if (minor < 0 || minor >= NR_FDS) return ENXIO; if (uds_fd_table[minor].state != UDS_INUSE) { /* attempted to write to a socket that hasn't been opened -- * something is very wrong :( */ return EINVAL; } rc = uds_perform_write(minor, endpt, grant, size, 0); /* If the call couldn't complete, suspend the caller. */ if (rc == EDONTREPLY) { uds_fd_table[minor].suspended = UDS_SUSPENDED_WRITE; uds_fd_table[minor].susp_endpt = endpt; uds_fd_table[minor].susp_grant = grant; uds_fd_table[minor].susp_size = size; uds_fd_table[minor].susp_id = id; /* If the call wasn't supposed to block, cancel immediately. */ if (flags & FLG_OP_NONBLOCK) { uds_cancel(minor, endpt, id); rc = EAGAIN; } } return rc; } static int uds_ioctl(devminor_t minor, unsigned long request, endpoint_t endpt, cp_grant_id_t grant, int flags, endpoint_t user_endpt, cdev_id_t id) { int rc; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_ioctl() call_count=%d\n", minor, ++call_count); #endif if (minor < 0 || minor >= NR_FDS) return ENXIO; if (uds_fd_table[minor].state != UDS_INUSE) { /* attempted to perform I/O control on a socket that hasn't * been opened -- something is very wrong :( */ return EINVAL; } /* update the owner endpoint */ uds_fd_table[minor].owner = user_endpt; /* let the UDS subsystem handle the actual request */ rc = uds_do_ioctl(minor, request, endpt, grant); /* If the call couldn't complete, suspend the caller. */ if (rc == EDONTREPLY) { /* The suspension type is already set by the IOCTL handler. */ if (uds_fd_table[minor].suspended == UDS_NOT_SUSPENDED) panic("IOCTL did not actually suspend?"); uds_fd_table[minor].susp_endpt = endpt; uds_fd_table[minor].susp_grant = grant; uds_fd_table[minor].susp_size = 0; /* irrelevant */ uds_fd_table[minor].susp_id = id; /* If the call wasn't supposed to block, cancel immediately. */ if (flags & FLG_OP_NONBLOCK) { uds_cancel(minor, endpt, id); rc = EAGAIN; } } return rc; } void uds_unsuspend(devminor_t minor) { int r; uds_fd_t *fdp; fdp = &uds_fd_table[minor]; switch (fdp->suspended) { case UDS_SUSPENDED_READ: r = uds_perform_read(minor, fdp->susp_endpt, fdp->susp_grant, fdp->susp_size, 0); if (r == EDONTREPLY) return; break; case UDS_SUSPENDED_WRITE: r = uds_perform_write(minor, fdp->susp_endpt, fdp->susp_grant, fdp->susp_size, 0); if (r == EDONTREPLY) return; break; case UDS_SUSPENDED_CONNECT: case UDS_SUSPENDED_ACCEPT: /* In both cases, the caller already set up the connection. * The only thing to do here is unblock. */ r = OK; break; default: panic("unknown suspension type %d", fdp->suspended); } chardriver_reply_task(fdp->susp_endpt, fdp->susp_id, r); fdp->suspended = UDS_NOT_SUSPENDED; } static int uds_cancel(devminor_t minor, endpoint_t endpt, cdev_id_t id) { uds_fd_t *fdp; int i, j; #if DEBUG == 1 static int call_count = 0; printf("(uds) [%d] uds_cancel() call_count=%d\n", minor, ++call_count); #endif if (minor < 0 || minor >= NR_FDS) return EDONTREPLY; fdp = &uds_fd_table[minor]; if (fdp->state != UDS_INUSE) { printf("PFS: cancel request for closed minor %d\n", minor); return EDONTREPLY; } /* Make sure the cancel request is for a request we're hanging on. */ if (fdp->suspended == UDS_NOT_SUSPENDED || fdp->susp_endpt != endpt || fdp->susp_id != id) { return EDONTREPLY; /* this happens. */ } /* The system call was cancelled, so the socket is not suspended * anymore. */ switch (fdp->suspended) { case UDS_SUSPENDED_ACCEPT: /* accept() */ /* partial accept() only changes * uds_fd_table[minorparent].child */ for (i = 0; i < NR_FDS; i++) { if (uds_fd_table[i].child == minor) { uds_fd_table[i].child = -1; } } break; case UDS_SUSPENDED_CONNECT: /* connect() */ /* partial connect() sets addr and adds minor to server backlog */ for (i = 0; i < NR_FDS; i++) { /* find a socket that is in use. */ if (uds_fd_table[i].state != UDS_INUSE) continue; /* see if minor is in the backlog */ for (j = 0; j < uds_fd_table[i].backlog_size; j++) { if (uds_fd_table[i].backlog[j] == minor) { /* remove from backlog */ uds_fd_table[i].backlog[j] = -1; } } } /* clear the address */ memset(&(uds_fd_table[minor].addr), '\0', sizeof(struct sockaddr_un)); break; case UDS_SUSPENDED_READ: case UDS_SUSPENDED_WRITE: /* Nothing more to do. */ break; default: panic("unknown suspension type %d", fdp->suspended); } fdp->suspended = UDS_NOT_SUSPENDED; return EINTR; /* reply to the original request */ }