minix/servers/lwip/socket.c
David van Moolenbroek c51cd5fe91 Server/driver protocols: no longer allow third-party copies.
Before safecopies, the IO_ENDPT and DL_ENDPT message fields were needed
to know which actual process to copy data from/to, as that process may
not always be the caller. Now that we have full safecopy support, these
fields have become useless for that purpose: the owner of the grant is
*always* the caller. Allowing the caller to supply another endpoint is
in fact dangerous, because the callee may then end up using a grant
from a third party. One could call this a variant of the confused
deputy problem.

From now on, safecopy calls should always use the caller's endpoint as
grant owner. This fully obsoletes the DL_ENDPT field in the
inet/ethernet protocol. IO_ENDPT has other uses besides identifying the
grant owner though. This patch renames IO_ENDPT to USER_ENDPT, not only
because that is a more fitting name (it should never be used for I/O
after all), but also in order to intentionally break any old system
source code outside the base system. If this patch breaks your code,
fixing it is fairly simple:

- DL_ENDPT should be replaced with m_source;
- IO_ENDPT should be replaced with m_source when used for safecopies;
- IO_ENDPT should be replaced with USER_ENDPT for any other use, e.g.
  when setting REP_ENDPT, matching requests in CANCEL calls, getting
  DEV_SELECT flags, and retrieving of the real user process's endpoint
  in DEV_OPEN.

The changes in this patch are binary backward compatible.
2011-04-11 17:35:05 +00:00

645 lines
13 KiB
C

/*
* This file implements handling of socket-related requests from VFS
*/
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <minix/ipc.h>
#include <minix/com.h>
#include <minix/callnr.h>
#include <minix/sysutil.h>
#include <lwip/tcp.h>
#include <net/ioctl.h>
#include "inet_config.h"
#include "proto.h"
#include "socket.h"
#if 0
#define debug_sock_print(str, ...) printf("LWIP %s:%d : " str "\n", \
__func__, __LINE__, ##__VA_ARGS__)
#else
#define debug_sock_print(...) debug_print(__VA_ARGS__)
#endif
struct socket socket[MAX_SOCKETS];
static int notified;
#define recv_q_alloc() debug_malloc(sizeof(struct recv_q))
#define recv_q_free debug_free
struct mq {
message m;
struct mq * prev;
struct mq * next;
};
#define mq_alloc() debug_malloc(sizeof(struct mq))
#define mq_free debug_free
static struct mq * mq_head, *mq_tail;
static int mq_enqueue(message * m)
{
struct mq * mq;
debug_sock_print("sock %d op %d", m->DEVICE, m->m_type);
mq = mq_alloc();
if (mq == NULL)
return -1;
mq->next = NULL;
mq->m = *m;
if (mq_head) {
mq->prev = mq_tail;
mq_tail->next = mq;
mq_tail = mq;
}
else {
mq->prev = NULL;
mq_head = mq_tail = mq;
}
return 0;
}
__unused static struct mq * mq_dequeue_head(void)
{
struct mq * ret;
if (!mq_head)
return NULL;
ret = mq_head;
if (mq_head != mq_tail) {
mq_head = mq_head->next;
mq_head->prev = NULL;
} else
mq_head = mq_tail = NULL;
debug_sock_print("socket %d\n", ret->m.DEVICE);
return ret;
}
static void mq_dequeue(struct mq * mq)
{
if (mq_head == mq_tail)
mq_head = mq_tail = NULL;
else {
if (mq->prev == NULL) {
mq_head = mq->next;
mq_head->prev = NULL;
} else
mq->prev->next = mq->next;
if (mq->next == NULL) {
mq_tail = mq->prev;
mq_tail->next = NULL;
} else
mq->next->prev = mq->prev;
}
}
static int mq_cancel(message * m)
{
struct mq * mq;
for (mq = mq_tail; mq; mq = mq->prev) {
if (m->DEVICE == mq->m.DEVICE &&
m->USER_ENDPT == mq->m.USER_ENDPT &&
m->IO_GRANT == mq->m.IO_GRANT) {
debug_sock_print("socket %d\n", mq->m.DEVICE);
break;
}
}
mq_dequeue(mq);
mq_free(mq);
return 1;
}
int sock_enqueue_data(struct socket * sock, void * data, unsigned size)
{
struct recv_q * r;
if (!(r = recv_q_alloc()))
return ENOMEM;
r->data = data;
r->next = NULL;
if (sock->recv_head) {
sock->recv_tail->next = r;
sock->recv_tail = r;
} else {
sock->recv_head = sock->recv_tail = r;
}
assert(size > 0);
sock->recv_data_size += size;
return OK;
}
void * sock_dequeue_data(struct socket * sock)
{
void * data;
struct recv_q * r;
if ((r = sock->recv_head)) {
data = r->data;
if (!(sock->recv_head = r->next))
sock->recv_tail = NULL;
recv_q_free(r);
return data;
}
return NULL;
}
void sock_dequeue_data_all(struct socket * sock,
recv_data_free_fn data_free)
{
void * data;
while ((data = sock_dequeue_data(sock)))
data_free(data);
sock->recv_data_size = 0;
}
static void set_reply_msg(message * m, int status)
{
int proc, ref;
proc= m->USER_ENDPT;
ref= (int)m->IO_GRANT;
m->REP_ENDPT= proc;
m->REP_STATUS= status;
m->REP_IO_GRANT= ref;
}
void send_reply(message * m, int status)
{
int result;
debug_sock_print("status %d", status);
set_reply_msg(m, status);
m->m_type = TASK_REPLY;
result = send(m->m_source, m);
if (result != OK)
panic("LWIP : unable to send (err %d)", result);
}
void sock_revive(struct socket * sock, int status)
{
int result;
assert(!(sock->flags & SOCK_FLG_OP_REVIVING));
assert(sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_SUSPENDED));
if (notified) {
debug_sock_print("already notified");
return;
}
else {
assert(sock->mess.m_type != DEV_REVIVE);
notified = 1;
}
debug_sock_print("socket num %ld, status %d",
get_sock_num(sock), status);
sock->mess.m_type = DEV_REVIVE;
set_reply_msg(&sock->mess, status);
result = notify(sock->mess.m_source);
if (result != OK)
panic("LWIP : unable to notify (err %d)", result);
sock->flags |= SOCK_FLG_OP_REVIVING;
}
void sock_select_notify(struct socket * sock)
{
int result;
debug_sock_print("socket num %ld", get_sock_num(sock));
assert(sock->select_ep != NONE);
sock->flags |= SOCK_FLG_SEL_CHECK;
if (notified) {
debug_sock_print("already notified");
return;
}
else
notified = 1;
result = notify(sock->select_ep);
if (result != OK)
panic("LWIP : unable to notify (err %d)", result);
}
void sock_reply(struct socket * sock, int status)
{
debug_sock_print("socket num %ld status %d type %d",
get_sock_num(sock), status, sock->mess.m_type);
/*
* If the status is SUSPEND send the
* message only if this operation wasn't
* suspended already, e.g. by enqueing the
* message when the socket was busy
* because of another pending message
*
* If there is a pending operation or we a reprocessing a suspended
* operation, revive.
*
* Otherwise send a message straightaway
*/
if (status == SUSPEND) {
if (sock->flags & SOCK_FLG_OP_SUSPENDED) {
debug_sock_print("suspended before");
sock->flags &= ~SOCK_FLG_OP_SUSPENDED;
return;
}
message m = sock->mess;
debug_sock_print("SUSPEND");
send_reply(&m, status);
} else if (sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_SUSPENDED)) {
sock_revive(sock, status);
/*
* From now on, we process suspended calls as any other. The
* status is set and will be collected
*/
sock->flags &= ~SOCK_FLG_OP_SUSPENDED;
} else
send_reply(&sock->mess, status);
}
struct socket * get_unused_sock(void)
{
int i;
for (i = SOCK_TYPES + MAX_DEVS; i < MAX_SOCKETS; i++) {
if (socket[i].ops == NULL) {
/* clear it all */
memset(&socket[i], 0, sizeof(struct socket));
return &socket[i];
}
}
return NULL;
}
struct socket * get_nic_sock(unsigned dev)
{
if (dev < MAX_DEVS)
return &socket[dev + SOCK_TYPES];
else
return NULL;
}
static void socket_open(message * m)
{
struct sock_ops * ops;
struct socket * sock;
int ret = OK;
switch (m->DEVICE) {
case SOCK_TYPE_TCP:
ops = &sock_tcp_ops;
break;
case SOCK_TYPE_UDP:
ops = &sock_udp_ops;
break;
case SOCK_TYPE_IP:
ops = &sock_raw_ip_ops;
break;
default:
if (m->DEVICE - SOCK_TYPES < MAX_DEVS) {
m->DEVICE -= SOCK_TYPES;
nic_open(m);
return;
}
printf("LWIP unknown socket type %d\n", m->DEVICE);
send_reply(m, EINVAL);
return;
}
sock = get_unused_sock();
if (!sock) {
printf("LWIP : no free socket\n");
send_reply(m, EAGAIN);
return;
}
sock->ops = ops;
sock->select_ep = NONE;
sock->recv_data_size = 0;
if (sock->ops && sock->ops->open)
ret = sock->ops->open(sock, m);
if (ret == OK) {
debug_sock_print("new socket %ld", get_sock_num(sock));
send_reply(m, get_sock_num(sock));
} else {
debug_sock_print("failed %d", ret);
send_reply(m, ret);
}
}
static void do_status(message * m)
{
int i;
debug_sock_print("called");
notified = 0;
for (i = 0; i < MAX_SOCKETS; i++) {
struct socket * sock = &socket[i];
if (!sock->ops) {
continue;
}
if (sock->flags & (SOCK_FLG_OP_REVIVING)) {
/*
* We send the reply and we are done with this request
*/
debug_sock_print("status %d ep %d sent sock %ld type %d",
sock->mess.REP_STATUS,
sock->mess.REP_ENDPT,
get_sock_num(sock),
sock->mess.m_type);
send(m->m_source, &sock->mess);
/*
* Remove only the reviving flag, i.e. the status has
* been consumed. SOCK_FLG_OP_PENDING may stay set. For
* instance in case of a TCP write, the application is
* already notified while the process of sending is
* still going on
*/
sock->flags &= ~SOCK_FLG_OP_REVIVING;
return;
}
/*
* We check select AFTER possible reviving an operation,
* otherwise the select will fail as the socket is still
* blocking
*/
if (sock_select_check_set(sock)) {
if (sock->ops && sock->ops->select_reply) {
message msg;
msg.m_type = DEV_IO_READY;
msg.DEV_MINOR = get_sock_num(sock);
msg.DEV_SEL_OPS = 0;
sock->ops->select_reply(sock, &msg);
if (msg.DEV_SEL_OPS) {
int result;
debug_sock_print("socket num %d select "
"result 0x%x sent",
msg.DEV_MINOR,
msg.DEV_SEL_OPS);
result = send(sock->select_ep, &msg);
if (result != OK)
panic("LWIP : unable to send "
"(err %d)", result);
sock_clear_select(sock);
sock->select_ep = NONE;
return;
}
}
}
}
debug_sock_print("no status");
m->m_type = DEV_NO_STATUS;
send(m->m_source, m);
}
static void socket_request_socket(struct socket * sock, message * m)
{
switch (m->m_type) {
case DEV_READ_S:
if (sock && sock->ops && sock->ops->read)
sock->ops->read(sock, m);
else
send_reply(m, EINVAL);
return;
case DEV_WRITE_S:
if (sock && sock->ops && sock->ops->write)
sock->ops->write(sock, m);
else
send_reply(m, EINVAL);
return;
case DEV_IOCTL_S:
if (sock && sock->ops && sock->ops->ioctl)
sock->ops->ioctl(sock, m);
else
send_reply(m, EINVAL);
return;
default:
panic("LWIP : cannot happen!");
}
}
void socket_request(message * m)
{
struct socket * sock;
switch (m->m_type) {
case DEV_OPEN:
socket_open(m);
return;
case DEV_CLOSE:
sock = get_sock(m->DEVICE);
if (sock && sock->ops && sock->ops->close) {
sock->flags &= ~SOCK_FLG_OP_PENDING;
sock->mess = *m;
sock->ops->close(sock, m);
} else
send_reply(m, EINVAL);
return;
case DEV_READ_S:
case DEV_WRITE_S:
case DEV_IOCTL_S:
sock = get_sock(m->DEVICE);
if (!sock) {
send_reply(m, EINVAL);
return;
}
/*
* If an operation is pending (blocking operation) or writing is
* still going and we want to read, suspend the new operation
*/
if ((sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) |
(m->m_type == DEV_READ_S &&
sock->flags & SOCK_FLG_OP_WRITING)) {
char * o = "\0";
if (sock->flags & SOCK_FLG_OP_READING)
o = "READ";
else if (sock->flags & SOCK_FLG_OP_WRITING)
o = "WRITE";
else
o = "non R/W op";
debug_sock_print("socket %ld is busy by %s\n",
get_sock_num(sock), o);
if (mq_enqueue(m) == 0) {
send_reply(m, SUSPEND);
} else {
debug_sock_print("Enqueuing suspended "
"call failed");
send_reply(m, ENOMEM);
}
return;
}
sock->mess = *m;
socket_request_socket(sock, m);
return;
case CANCEL:
sock = get_sock(m->DEVICE);
debug_sock_print("socket num %ld", get_sock_num(sock));
/* Cancel the last operation in the queue */
if (mq_cancel(m)) {
send_reply(m, EINTR);
return;
/* ... or a blocked read */
} else if (sock->flags & SOCK_FLG_OP_PENDING &&
sock->flags & SOCK_FLG_OP_READING) {
sock->flags &= ~SOCK_FLG_OP_PENDING;
send_reply(m, EINTR);
return;
/*
* .. or return the status of the operation which was finished
* before canceled
*/
} else if (sock->flags & SOCK_FLG_OP_REVIVING) {
sock->flags &= ~SOCK_FLG_OP_REVIVING;
send_reply(m, sock->mess.REP_STATUS);
} else
panic("LWIP : no operation to cancel");
return;
case DEV_SELECT:
/*
* Select is always executed immediately and is never suspended.
* Although, it sets actions which must be monitored
*/
sock = get_sock(m->DEVICE);
assert(sock->select_ep == NONE || sock->select_ep == m->m_source);
if (sock && sock->ops && sock->ops->select) {
sock->ops->select(sock, m);
if (sock_select_set(sock))
sock->select_ep = m->m_source;
} else
send_reply(m, EINVAL);
return;
case DEV_STATUS:
do_status(m);
return;
default:
printf("LWIP : unknown message from VFS, type %d\n",
m->m_type);
}
send_reply(m, EGENERIC);
}
void mq_process(void)
{
struct mq * mq;
struct socket * sock;
mq = mq_head;
while(mq) {
struct mq * next = mq->next;
sock = get_sock(mq->m.DEVICE);
if (!(sock->flags &
(SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) &&
!(mq->m.m_type == DEV_READ_S &&
sock->flags & SOCK_FLG_OP_WRITING)) {
sock->flags = SOCK_FLG_OP_SUSPENDED;
debug_sock_print("resuming op on sock %ld\n",
get_sock_num(sock));
sock->mess = mq->m;
socket_request_socket(sock, &sock->mess);
mq_dequeue(mq);
mq_free(mq);
return;
}
mq = next;
}
}
void generic_op_select(struct socket * sock, message * m)
{
int retsel = 0, sel;
debug_print("socket num %ld 0x%x", get_sock_num(sock), m->USER_ENDPT);
sel = m->USER_ENDPT;
/* in this case any operation would block, no error */
if (sock->flags & SOCK_FLG_OP_PENDING) {
if (sel & SEL_NOTIFY) {
if (sel & SEL_RD)
sock->flags |= SOCK_FLG_SEL_READ;
if (sel & SEL_WR)
sock->flags |= SOCK_FLG_SEL_WRITE;
/* FIXME we do not monitor error */
}
send_reply(m, 0);
return;
}
if (sel & SEL_RD) {
if (sock->recv_head)
retsel |= SEL_RD;
else if (sel & SEL_NOTIFY)
sock->flags |= SOCK_FLG_SEL_READ;
}
/* FIXME generic packet socket never blocks on write */
if (sel & SEL_WR)
retsel |= SEL_WR;
/* FIXME SEL_ERR is ignored, we do not generate exceptions */
send_reply(m, retsel);
}
void generic_op_select_reply(struct socket * sock, __unused message * m)
{
assert(sock->select_ep != NONE);
debug_print("socket num %ld", get_sock_num(sock));
/* unused for generic packet socket, see generic_op_select() */
assert((sock->flags & (SOCK_FLG_SEL_WRITE | SOCK_FLG_SEL_ERROR)) == 0);
if (sock->flags & (SOCK_FLG_OP_PENDING | SOCK_FLG_OP_REVIVING)) {
debug_print("WARNING socket still blocking!");
return;
}
if (sock->flags & SOCK_FLG_SEL_READ && sock->recv_head)
m->DEV_SEL_OPS |= SEL_RD;
if (m->DEV_SEL_OPS)
sock->flags &= ~(SOCK_FLG_SEL_WRITE | SOCK_FLG_SEL_READ |
SOCK_FLG_SEL_ERROR);
}