minix/minix/servers/vfs/select.c

1106 lines
35 KiB
C
Raw Normal View History

/* Implement entry point to select system call.
*
* The entry points into this file are
* do_select: perform the SELECT system call
2012-02-13 16:28:04 +01:00
* select_callback: notify select system of possible fd operation
endpoint-aware conversion of servers. 'who', indicating caller number in pm and fs and some other servers, has been removed in favour of 'who_e' (endpoint) and 'who_p' (proc nr.). In both PM and FS, isokendpt() convert endpoints to process slot numbers, returning OK if it was a valid and consistent endpoint number. okendpt() does the same but panic()s if it doesn't succeed. (In PM, this is pm_isok..) pm and fs keep their own records of process endpoints in their proc tables, which are needed to make kernel calls about those processes. message field names have changed. fs drivers are endpoints. fs now doesn't try to get out of driver deadlock, as the protocol isn't supposed to let that happen any more. (A warning is printed if ELOCKED is detected though.) fproc[].fp_task (indicating which driver the process is suspended on) became an int. PM and FS now get endpoint numbers of initial boot processes from the kernel. These happen to be the same as the old proc numbers, to let user processes reach them with the old numbers, but FS and PM don't know that. All new processes after INIT, even after the generation number wraps around, get endpoint numbers with generation 1 and higher, so the first instances of the boot processes are the only processes ever to have endpoint numbers in the old proc number range. More return code checks of sys_* functions have been added. IS has become endpoint-aware. Ditched the 'text' and 'data' fields in the kernel dump (which show locations, not sizes, so aren't terribly useful) in favour of the endpoint number. Proc number is still visible. Some other dumps (e.g. dmap, rs) show endpoint numbers now too which got the formatting changed. PM reading segments using rw_seg() has changed - it uses other fields in the message now instead of encoding the segment and process number and fd in the fd field. For that it uses _read_pm() and _write_pm() which to _taskcall()s directly in pm/misc.c. PM now sys_exit()s itself on panic(), instead of sys_abort(). RS also talks in endpoints instead of process numbers.
2006-03-03 11:20:58 +01:00
* select_unsuspend_by_endpt: cancel a blocking select on exiting driver
*
* The select code uses minimal locking, so that the replies from character
* drivers can be processed without blocking. Filps are locked only for pipes.
* We make the assumption that any other structures and fields are safe to
* check (and possibly change) as long as we know that a process is blocked on
* a select(2) call, meaning that all involved filps are guaranteed to stay
* open until either we finish the select call, it the process gets interrupted
* by a signal.
*/
#include "fs.h"
#include <sys/fcntl.h>
#include <sys/time.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <minix/callnr.h>
#include <minix/u64.h>
#include <string.h>
#include <assert.h>
#include "file.h"
#include "vnode.h"
/* max. number of simultaneously pending select() calls */
#define MAXSELECTS 25
#define FROM_PROC 0
#define TO_PROC 1
2012-03-25 20:25:53 +02:00
static struct selectentry {
struct fproc *requestor; /* slot is free iff this is NULL */
endpoint_t req_endpt;
fd_set readfds, writefds, errorfds;
fd_set ready_readfds, ready_writefds, ready_errorfds;
fd_set *vir_readfds, *vir_writefds, *vir_errorfds;
struct filp *filps[OPEN_MAX];
int type[OPEN_MAX];
int nfds, nreadyfds;
int error;
char block;
char starting;
clock_t expiry;
minix_timer_t timer; /* if expiry > 0 */
} selecttab[MAXSELECTS];
static int copy_fdsets(struct selectentry *se, int nfds, int direction);
2012-03-25 20:25:53 +02:00
static void filp_status(struct filp *fp, int status);
static int is_deferred(struct selectentry *se);
static void restart_proc(struct selectentry *se);
static void ops2tab(int ops, int fd, struct selectentry *e);
static int is_regular_file(struct filp *f);
static int is_pipe(struct filp *f);
static int is_char_device(struct filp *f);
2012-03-25 20:25:53 +02:00
static void select_lock_filp(struct filp *f, int ops);
static int select_request_file(struct filp *f, int *ops, int block,
struct fproc *rfp);
static int select_request_char(struct filp *f, int *ops, int block,
struct fproc *rfp);
static int select_request_pipe(struct filp *f, int *ops, int block,
struct fproc *rfp);
2012-03-25 20:25:53 +02:00
static void select_cancel_all(struct selectentry *e);
static void select_cancel_filp(struct filp *f);
static void select_return(struct selectentry *);
static void select_restart_filps(void);
static int tab2ops(int fd, struct selectentry *e);
static void wipe_select(struct selectentry *s);
static struct fdtype {
int (*select_request)(struct filp *, int *ops, int block,
struct fproc *rfp);
int (*type_match)(struct filp *f);
2012-02-13 16:28:04 +01:00
} fdtypes[] = {
{ select_request_char, is_char_device },
{ select_request_file, is_regular_file },
{ select_request_pipe, is_pipe },
};
#define SEL_FDS (sizeof(fdtypes) / sizeof(fdtypes[0]))
/*===========================================================================*
* do_select *
*===========================================================================*/
int do_select(void)
{
/* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
* call. First we copy the arguments and verify their sanity. Then we check
* whether there are file descriptors that satisfy the select call right off
* the bat. If so, or if there are no ready file descriptors but the process
* requested to return immediately, we return the result. Otherwise we set a
2012-02-13 16:28:04 +01:00
* timeout and wait for either the file descriptors to become ready or the
* timer to go off. If no timeout value was provided, we wait indefinitely.
*/
2010-03-22 21:43:06 +01:00
int r, nfds, do_timeout = 0, fd, s;
struct filp *f;
unsigned int type, ops;
struct timeval timeout;
struct selectentry *se;
vir_bytes vtimeout;
nfds = job_m_in.m_lc_vfs_select.nfds;
vtimeout = job_m_in.m_lc_vfs_select.timeout;
/* Sane amount of file descriptors? */
if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
/* Find a slot to store this select request */
for (s = 0; s < MAXSELECTS; s++)
if (selecttab[s].requestor == NULL) /* Unused slot */
break;
if (s >= MAXSELECTS) return(ENOSPC);
se = &selecttab[s];
wipe_select(se); /* Clear results of previous usage */
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
se->requestor = fp;
se->req_endpt = who_e;
se->vir_readfds = job_m_in.m_lc_vfs_select.readfds;
se->vir_writefds = job_m_in.m_lc_vfs_select.writefds;
se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds;
/* Copy fdsets from the process */
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) {
se->requestor = NULL;
return(r);
}
/* Did the process set a timeout value? If so, retrieve it. */
if (vtimeout != 0) {
2012-02-13 16:28:04 +01:00
do_timeout = 1;
r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout,
sizeof(timeout));
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
if (r != OK) {
se->requestor = NULL;
return(r);
}
}
/* No nonsense in the timeval */
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0)) {
se->requestor = NULL;
return(EINVAL);
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
}
/* If there is no timeout, we block forever. Otherwise, we block up to the
2012-02-13 16:28:04 +01:00
* specified time interval.
*/
if (!do_timeout) /* No timeout value set */
se->block = 1;
else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
2012-02-13 16:28:04 +01:00
se->block = 1;
else /* timeout set as (0,0) - this effects a poll */
2012-02-13 16:28:04 +01:00
se->block = 0;
se->expiry = 0; /* no timer set (yet) */
/* We are going to lock filps, and that means that while locking a second
* filp, we might already get the results for the first one. In that case,
* the incoming results must not cause the select call to finish prematurely.
*/
se->starting = TRUE;
/* Verify that file descriptors are okay to select on */
for (fd = 0; fd < nfds; fd++) {
/* Because the select() interface implicitly includes file descriptors
* you might not want to select on, we have to figure out whether we're
* interested in them. Typically, these file descriptors include fd's
* inherited from the parent proc and file descriptors that have been
* close()d, but had a lower fd than one in the current set.
*/
2012-02-13 16:28:04 +01:00
if (!(ops = tab2ops(fd, se)))
continue; /* No operations set; nothing to do for this fd */
/* Get filp belonging to this fd */
2012-02-13 16:28:04 +01:00
f = se->filps[fd] = get_filp(fd, VNODE_READ);
if (f == NULL) {
2012-02-13 16:28:04 +01:00
if (err_code == EBADF)
r = err_code;
else /* File descriptor is 'ready' to return EIO */
r = EINTR;
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
se->requestor = NULL;
return(r);
2012-02-13 16:28:04 +01:00
}
/* Check file types. According to POSIX 2008:
* "The pselect() and select() functions shall support regular files,
* terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
* behavior of pselect() and select() on file descriptors that refer to
* other types of file is unspecified."
*
* In our case, terminal and pseudo-terminal devices are handled by the
* TTY major and sockets by either INET major (socket type AF_INET) or
* UDS major (socket type AF_UNIX). Additionally, we give other
* character drivers the chance to handle select for any of their
* device nodes. Some may not implement support for select and let
* libchardriver return EBADF, which we then pass to the calling
* process once we receive the reply.
*/
se->type[fd] = -1;
for (type = 0; type < SEL_FDS; type++) {
if (fdtypes[type].type_match(f)) {
se->type[fd] = type;
se->nfds = fd+1;
se->filps[fd]->filp_selectors++;
break;
}
}
2012-02-13 16:28:04 +01:00
unlock_filp(f);
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
if (se->type[fd] == -1) { /* Type not found */
se->requestor = NULL;
return(EBADF);
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
}
}
/* Check all file descriptors in the set whether one is 'ready' now */
for (fd = 0; fd < nfds; fd++) {
/* Again, check for involuntarily selected fd's */
2012-02-13 16:28:04 +01:00
if (!(ops = tab2ops(fd, se)))
continue; /* No operations set; nothing to do for this fd */
/* File descriptors selected for reading that are not opened for
* reading should be marked as readable, as read calls would fail
* immediately. The same applies to writing.
*/
f = se->filps[fd];
if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
ops2tab(SEL_RD, fd, se);
ops &= ~SEL_RD;
}
if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) {
ops2tab(SEL_WR, fd, se);
ops &= ~SEL_WR;
}
2012-02-13 16:28:04 +01:00
/* Test filp for select operations if not already done so. e.g.,
* processes sharing a filp and both doing a select on that filp. */
if ((f->filp_select_ops & ops) != ops) {
int wantops;
wantops = (f->filp_select_ops |= ops);
type = se->type[fd];
select_lock_filp(f, wantops);
r = fdtypes[type].select_request(f, &wantops, se->block, fp);
unlock_filp(f);
if (r != OK && r != SUSPEND) {
se->error = r;
break; /* Error or bogus return code; abort */
}
2012-02-13 16:28:04 +01:00
/* The select request above might have turned on/off some
* operations because they were 'ready' or not meaningful.
* Either way, we might have a result and we need to store them
* in the select table entry. */
if (wantops & ops) ops2tab(wantops, fd, se);
2012-02-13 16:28:04 +01:00
}
}
/* At this point there won't be any blocking calls anymore. */
se->starting = FALSE;
if ((se->nreadyfds > 0 || se->error != OK || !se->block) &&
!is_deferred(se)) {
/* An error occurred, or fd's were found that were ready to go right
* away, and/or we were instructed not to block at all. Must return
* immediately. Do not copy FD sets if an error occurred.
*/
if (se->error != OK)
r = se->error;
else
r = copy_fdsets(se, se->nfds, TO_PROC);
select_cancel_all(se);
VFS: make all IPC asynchronous By decoupling synchronous drivers from VFS, we are a big step closer to supporting driver crashes under all circumstances. That is, VFS can't become stuck on IPC with a synchronous driver (e.g., INET) and can recover from crashing block drivers during open/close/ioctl or during communication with an FS. In order to maintain serialized communication with a synchronous driver, the communication is wrapped by a mutex on a per driver basis (not major numbers as there can be multiple majors with identical endpoints). Majors that share a driver endpoint point to a single mutex object. In order to support crashes from block drivers, the file reopen tactic had to be changed; first reopen files associated with the crashed driver, then send the new driver endpoint to FSes. This solves a deadlock between the FS and the block driver; - VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it after retrying the current request to the newly started driver. - The block driver would refuse the retried request until all files had been reopened. - VFS would reopen files only after getting a reply from the initial REQ_NEW_DRIVER. When a character special driver crashes, all associated files have to be marked invalid and closed (or reopened if flagged as such). However, they can only be closed if a thread holds exclusive access to it. To obtain exclusive access, the worker thread (which handles the new driver endpoint event from DS) schedules a new job to garbage collect invalid files. This way, we can signal the worker thread that was talking to the crashed driver and will release exclusive access to a file associated with the crashed driver and prevent the garbage collecting worker thread from dead locking on that file. Also, when a character special driver crashes, RS will unmap the driver and remap it upon restart. During unmapping, associated files are marked invalid instead of waiting for an endpoint up event from DS, as that event might come later than new read/write/select requests and thus cause confusion in the freshly started driver. When locking a filp, the usage counters are no longer checked. The usage counter can legally go down to zero during filp invalidation while there are locks pending. DS events are handled by a separate worker thread instead of the main thread as reopening files could lead to another crash and a stuck thread. An additional worker thread is then necessary to unlock it. Finally, with everything asynchronous a race condition in do_select surfaced. A select entry was only marked in use after succesfully sending initial select requests to drivers and having to wait. When multiple select() calls were handled there was opportunity that these entries were overwritten. This had as effect that some select results were ignored (and select() remained blocking instead if returning) or do_select tried to access filps that were not present (because thrown away by secondary select()). This bug manifested itself with sendrecs, but was very hard to reproduce. However, it became awfully easy to trigger with asynsends only.
2012-08-28 16:06:51 +02:00
se->requestor = NULL;
if (r != OK)
return(r);
return(se->nreadyfds);
}
2012-02-13 16:28:04 +01:00
/* Convert timeval to ticks and set the timer. If it fails, undo
* all, return error.
*/
if (do_timeout) {
int ticks;
/* Open Group:
* "If the requested timeout interval requires a finer
* granularity than the implementation supports, the
* actual timeout interval shall be rounded up to the next
* supported value."
*/
#define USECPERSEC 1000000
while(timeout.tv_usec >= USECPERSEC) {
/* this is to avoid overflow with *system_hz below */
timeout.tv_usec -= USECPERSEC;
timeout.tv_sec++;
}
ticks = timeout.tv_sec * system_hz +
(timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
se->expiry = ticks;
2010-07-09 14:58:18 +02:00
set_timer(&se->timer, ticks, select_timeout_check, s);
}
/* process now blocked */
suspend(FP_BLOCKED_ON_SELECT);
return(SUSPEND);
}
/*===========================================================================*
* is_deferred *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int is_deferred(struct selectentry *se)
{
/* Find out whether this select has pending initial replies */
int fd;
struct filp *f;
/* The select call must have finished its initialization at all. */
if (se->starting) return(TRUE);
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) continue;
if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
}
return(FALSE);
}
/*===========================================================================*
* is_regular_file *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int is_regular_file(struct filp *f)
{
2012-04-25 14:44:42 +02:00
return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode));
}
/*===========================================================================*
* is_pipe *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int is_pipe(struct filp *f)
{
/* Recognize either anonymous pipe or named pipe (FIFO) */
return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode));
}
/*===========================================================================*
* is_char_device *
*===========================================================================*/
static int is_char_device(struct filp *f)
{
/* See if this filp is a handle on a character device. This function MUST NOT
* block its calling thread. The given filp may or may not be locked.
*/
return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
}
2008-02-22 16:46:59 +01:00
/*===========================================================================*
* select_request_char *
2008-02-22 16:46:59 +01:00
*===========================================================================*/
static int select_request_char(struct filp *f, int *ops, int block,
struct fproc *rfp)
2008-02-22 16:46:59 +01:00
{
/* Check readiness status on a character device. Unless suitable results are
* available right now, this will only initiate the polling process, causing
* result processing to be deferred. This function MUST NOT block its calling
* thread. The given filp may or may not be locked.
*/
dev_t dev;
int r, rops;
struct dmap *dp;
2008-02-22 16:46:59 +01:00
/* Start by remapping the device node number to a "real" device number. Those
* two are different only for CTTY_MAJOR aka /dev/tty, but that one single
* exception requires quite some extra effort here: the select code matches
* character driver replies to their requests based on the device number, so
* it needs to be aware that device numbers may be mapped. The idea is to
* perform the mapping once and store the result in the filp object, so that
* at least we don't run into problems when a process loses its controlling
* terminal while doing a select (see also free_proc). It should be noted
* that it is possible that multiple processes share the same /dev/tty filp,
* and they may not all have a controlling terminal. The ctty-less processes
* should never pass the mapping; a more problematic case is checked below.
*
* The cdev_map call also checks the major number for rough validity, so that
* we can use it to index the dmap array safely a bit later.
*/
if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV)
return(ENXIO);
if (f->filp_char_select_dev != NO_DEV && f->filp_char_select_dev != dev) {
/* Currently, this case can occur as follows: a process with a
* controlling terminal opens /dev/tty and forks, the new child starts
* a new session, opens a new controlling terminal, and both parent and
* child call select on the /dev/tty file descriptor. If this case ever
* becomes real, a better solution may be to force-close a filp for
* /dev/tty when a new controlling terminal is opened.
*/
printf("VFS: file pointer has multiple controlling TTYs!\n");
return(EIO);
}
f->filp_char_select_dev = dev; /* set before possibly suspending */
rops = *ops;
/* By default, nothing to do */
*ops = 0;
if (!block && (f->filp_select_flags & FSF_BLOCKED)) {
/* This filp is blocked waiting for a reply, but we don't want to
* block ourselves. Unless we're awaiting the initial reply, these
* operations won't be ready */
if (!(f->filp_select_flags & FSF_BUSY)) {
if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
rops &= ~SEL_RD;
if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
rops &= ~SEL_WR;
if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
rops &= ~SEL_ERR;
if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
return(OK);
}
}
f->filp_select_flags |= FSF_UPDATE;
if (block) {
rops |= SEL_NOTIFY;
if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK;
if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK;
if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK;
}
2008-02-22 16:46:59 +01:00
if (f->filp_select_flags & FSF_BUSY)
return(SUSPEND);
2008-02-22 16:46:59 +01:00
dp = &dmap[major(dev)];
if (dp->dmap_sel_busy)
return(SUSPEND);
2008-02-22 16:46:59 +01:00
f->filp_select_flags &= ~FSF_UPDATE;
r = cdev_select(dev, rops);
if (r != OK)
return(r);
dp->dmap_sel_busy = TRUE;
dp->dmap_sel_filp = f;
f->filp_select_flags |= FSF_BUSY;
return(SUSPEND);
2008-02-22 16:46:59 +01:00
}
/*===========================================================================*
* select_request_file *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops),
int UNUSED(block), struct fproc *UNUSED(rfp))
{
/* Files are always ready, so output *ops is input *ops */
return(OK);
}
/*===========================================================================*
* select_request_pipe *
*===========================================================================*/
static int select_request_pipe(struct filp *f, int *ops, int block,
struct fproc *UNUSED(rfp))
{
/* Check readiness status on a pipe. The given filp is locked. This function
* may block its calling thread if necessary.
*/
int orig_ops, r = 0, err;
orig_ops = *ops;
if ((*ops & (SEL_RD|SEL_ERR))) {
/* Check if we can read 1 byte */
err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1,
1 /* Check only */);
if (err != SUSPEND)
r |= SEL_RD;
if (err < 0 && err != SUSPEND)
r |= SEL_ERR;
}
if ((*ops & (SEL_WR|SEL_ERR))) {
/* Check if we can write 1 byte */
err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1,
1 /* Check only */);
if (err != SUSPEND)
r |= SEL_WR;
if (err < 0 && err != SUSPEND)
r |= SEL_ERR;
}
/* Some options we collected might not be requested. */
*ops = r & orig_ops;
if (!*ops && block)
f->filp_pipe_select_ops |= orig_ops;
return(OK);
}
2005-09-22 23:17:22 +02:00
/*===========================================================================*
* tab2ops *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int tab2ops(int fd, struct selectentry *e)
{
int ops = 0;
if (FD_ISSET(fd, &e->readfds)) ops |= SEL_RD;
if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
2012-02-13 16:28:04 +01:00
return(ops);
}
2005-09-22 23:17:22 +02:00
/*===========================================================================*
* ops2tab *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void ops2tab(int ops, int fd, struct selectentry *e)
{
if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
!FD_ISSET(fd, &e->ready_readfds)) {
FD_SET(fd, &e->ready_readfds);
e->nreadyfds++;
}
if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
!FD_ISSET(fd, &e->ready_writefds)) {
FD_SET(fd, &e->ready_writefds);
e->nreadyfds++;
}
if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
!FD_ISSET(fd, &e->ready_errorfds)) {
FD_SET(fd, &e->ready_errorfds);
e->nreadyfds++;
}
}
2005-09-22 23:17:22 +02:00
/*===========================================================================*
* copy_fdsets *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int copy_fdsets(struct selectentry *se, int nfds, int direction)
{
/* Copy FD sets from or to the user process calling select(2). This function
* MUST NOT block the calling thread.
*/
int r;
size_t fd_setsize;
endpoint_t src_e, dst_e;
fd_set *src_fds, *dst_fds;
if (nfds < 0 || nfds > OPEN_MAX)
panic("select copy_fdsets: nfds wrong: %d", nfds);
/* Only copy back as many bits as the user expects. */
Build NetBSD libc library in world in ELF mode. 3 sets of libraries are built now: . ack: all libraries that ack can compile (/usr/lib/i386/) . clang+elf: all libraries with minix headers (/usr/lib/) . clang+elf: all libraries with netbsd headers (/usr/netbsd/) Once everything can be compiled with netbsd libraries and headers, the /usr/netbsd hierarchy will be obsolete and its libraries compiled with netbsd headers will be installed in /usr/lib, and its headers in /usr/include. (i.e. minix libc and current minix headers set will be gone.) To use the NetBSD libc system (libraries + headers) before it is the default libc, see: http://wiki.minix3.org/en/DevelopersGuide/UsingNetBSDCode This wiki page also documents the maintenance of the patch files of minix-specific changes to imported NetBSD code. Changes in this commit: . libsys: Add NBSD compilation and create a safe NBSD-based libc. . Port rest of libraries (except libddekit) to new header system. . Enable compilation of libddekit with new headers. . Enable kernel compilation with new headers. . Enable drivers compilation with new headers. . Port legacy commands to new headers and libc. . Port servers to new headers. . Add <sys/sigcontext.h> in compat library. . Remove dependency file in tree. . Enable compilation of common/lib/libc/atomic in libsys . Do not generate RCSID strings in libc. . Temporarily disable zoneinfo as they are incompatible with NetBSD format . obj-nbsd for .gitignore . Procfs: use only integer arithmetic. (Antoine Leca) . Increase ramdisk size to create NBSD-based images. . Remove INCSYMLINKS handling hack. . Add nbsd_include/sys/exec_elf.h . Enable ELF compilation with NBSD libc. . Add 'make nbsdsrc' in tools to download reference NetBSD sources. . Automate minix-port.patch creation. . Avoid using fstavfs() as it is *extremely* slow and unneeded. . Set err() as PRIVATE to avoid name clash with libc. . [NBSD] servers/vm: remove compilation warnings. . u32 is not a long in NBSD headers. . UPDATING info on netbsd hierarchy . commands fixes for netbsd libc
2011-04-27 15:00:52 +02:00
fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
/* Set source and destination endpoints */
src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
/* read set */
src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
if (se->vir_readfds) {
make vfs & filesystems use failable copying Change the kernel to add features to vircopy and safecopies so that transparent copy fixing won't happen to avoid deadlocks, and such copies fail with EFAULT. Transparently making copying work from filesystems (as normally done by the kernel & VM when copying fails because of missing/readonly memory) is problematic as it can happen that, for file-mapped ranges, that that same filesystem that is blocked on the copy request is needed to satisfy the memory range, leading to deadlock. Dito for VFS itself, if done with a blocking call. This change makes the copying done from a filesystem fail in such cases with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call fails with EFAULT, VFS will then request the range to be made available to VM after the FS is unblocked, allowing it to be used to satisfy the range if need be in another VFS thread. Similarly, for datacopies that VFS itself does, it uses the failable vircopy variant and callers use a wrapper that talk to VM if necessary to get the copy to work. . kernel: add CPF_TRY flag to safecopies . kernel: only request writable ranges to VM for the target buffer when copying fails . do copying in VFS TRY-first . some fixes in VM to build SANITYCHECK mode . add regression test for the cases where - a FS system call needs memory mapped in a process that the FS itself must map. - such a range covers more than one file-mapped region. . add 'try' mode to vircopy, physcopy . add flags field to copy kernel call messages . if CP_FLAG_TRY is set, do not transparently try to fix memory ranges . for use by VFS when accessing user buffers to avoid deadlock . remove some obsolete backwards compatability assignments . VFS: let thread scheduling work for VM requests too Allows VFS to make calls to VM while suspending and resuming the currently running thread. Does currently not work for the main thread. . VM: add fix memory range call for use by VFS Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
(vir_bytes) dst_fds, fd_setsize);
if (r != OK) return(r);
}
/* write set */
src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
if (se->vir_writefds) {
make vfs & filesystems use failable copying Change the kernel to add features to vircopy and safecopies so that transparent copy fixing won't happen to avoid deadlocks, and such copies fail with EFAULT. Transparently making copying work from filesystems (as normally done by the kernel & VM when copying fails because of missing/readonly memory) is problematic as it can happen that, for file-mapped ranges, that that same filesystem that is blocked on the copy request is needed to satisfy the memory range, leading to deadlock. Dito for VFS itself, if done with a blocking call. This change makes the copying done from a filesystem fail in such cases with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call fails with EFAULT, VFS will then request the range to be made available to VM after the FS is unblocked, allowing it to be used to satisfy the range if need be in another VFS thread. Similarly, for datacopies that VFS itself does, it uses the failable vircopy variant and callers use a wrapper that talk to VM if necessary to get the copy to work. . kernel: add CPF_TRY flag to safecopies . kernel: only request writable ranges to VM for the target buffer when copying fails . do copying in VFS TRY-first . some fixes in VM to build SANITYCHECK mode . add regression test for the cases where - a FS system call needs memory mapped in a process that the FS itself must map. - such a range covers more than one file-mapped region. . add 'try' mode to vircopy, physcopy . add flags field to copy kernel call messages . if CP_FLAG_TRY is set, do not transparently try to fix memory ranges . for use by VFS when accessing user buffers to avoid deadlock . remove some obsolete backwards compatability assignments . VFS: let thread scheduling work for VM requests too Allows VFS to make calls to VM while suspending and resuming the currently running thread. Does currently not work for the main thread. . VM: add fix memory range call for use by VFS Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
(vir_bytes) dst_fds, fd_setsize);
if (r != OK) return(r);
}
/* error set */
src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
if (se->vir_errorfds) {
make vfs & filesystems use failable copying Change the kernel to add features to vircopy and safecopies so that transparent copy fixing won't happen to avoid deadlocks, and such copies fail with EFAULT. Transparently making copying work from filesystems (as normally done by the kernel & VM when copying fails because of missing/readonly memory) is problematic as it can happen that, for file-mapped ranges, that that same filesystem that is blocked on the copy request is needed to satisfy the memory range, leading to deadlock. Dito for VFS itself, if done with a blocking call. This change makes the copying done from a filesystem fail in such cases with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call fails with EFAULT, VFS will then request the range to be made available to VM after the FS is unblocked, allowing it to be used to satisfy the range if need be in another VFS thread. Similarly, for datacopies that VFS itself does, it uses the failable vircopy variant and callers use a wrapper that talk to VM if necessary to get the copy to work. . kernel: add CPF_TRY flag to safecopies . kernel: only request writable ranges to VM for the target buffer when copying fails . do copying in VFS TRY-first . some fixes in VM to build SANITYCHECK mode . add regression test for the cases where - a FS system call needs memory mapped in a process that the FS itself must map. - such a range covers more than one file-mapped region. . add 'try' mode to vircopy, physcopy . add flags field to copy kernel call messages . if CP_FLAG_TRY is set, do not transparently try to fix memory ranges . for use by VFS when accessing user buffers to avoid deadlock . remove some obsolete backwards compatability assignments . VFS: let thread scheduling work for VM requests too Allows VFS to make calls to VM while suspending and resuming the currently running thread. Does currently not work for the main thread. . VM: add fix memory range call for use by VFS Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
(vir_bytes) dst_fds, fd_setsize);
if (r != OK) return(r);
}
return(OK);
}
2005-09-22 23:17:22 +02:00
/*===========================================================================*
* select_cancel_all *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void select_cancel_all(struct selectentry *se)
{
/* Cancel select, possibly on success. Decrease select usage and cancel timer.
* This function MUST NOT block its calling thread.
*/
int fd;
struct filp *f;
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) continue;
se->filps[fd] = NULL;
select_cancel_filp(f);
}
if (se->expiry > 0) {
2012-02-13 16:28:04 +01:00
cancel_timer(&se->timer);
se->expiry = 0;
}
se->requestor = NULL;
}
/*===========================================================================*
* select_cancel_filp *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void select_cancel_filp(struct filp *f)
{
/* Reduce the number of select users of this filp. This function MUST NOT block
* its calling thread.
*/
devmajor_t major;
assert(f);
assert(f->filp_selectors > 0);
assert(f->filp_count > 0);
2012-02-13 16:28:04 +01:00
f->filp_selectors--;
2012-02-13 16:28:04 +01:00
if (f->filp_selectors == 0) {
/* No one selecting on this filp anymore, forget about select state */
f->filp_select_ops = 0;
f->filp_select_flags = 0;
f->filp_pipe_select_ops = 0;
/* If this filp is the subject of an ongoing select query to a
* character device, mark the query as stale, so that this filp will
* not be checked when the result arrives. The filp select device may
* still be NO_DEV if do_select fails on the initial fd check.
*/
if (is_char_device(f) && f->filp_char_select_dev != NO_DEV) {
major = major(f->filp_char_select_dev);
if (dmap[major].dmap_sel_busy &&
dmap[major].dmap_sel_filp == f)
dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
f->filp_char_select_dev = NO_DEV;
}
}
}
/*===========================================================================*
* select_return *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void select_return(struct selectentry *se)
{
/* Return the results of a select call to the user process and revive the
* process. This function MUST NOT block its calling thread.
*/
int r;
assert(!is_deferred(se)); /* Not done yet, first wait for async reply */
select_cancel_all(se);
2012-02-13 16:28:04 +01:00
if (se->error != OK)
2012-02-13 16:28:04 +01:00
r = se->error;
else
r = copy_fdsets(se, se->nfds, TO_PROC);
if (r == OK)
2012-02-13 16:28:04 +01:00
r = se->nreadyfds;
revive(se->req_endpt, r);
}
/*===========================================================================*
* select_callback *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void select_callback(struct filp *f, int status)
{
/* The status of a filp has changed, with the given ready operations or error.
* This function is currently called only for pipes, and holds the lock to
* the filp.
*/
filp_status(f, status);
}
/*===========================================================================*
* init_select *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void init_select(void)
{
int s;
for (s = 0; s < MAXSELECTS; s++)
init_timer(&selecttab[s].timer);
}
/*===========================================================================*
2005-09-11 18:45:46 +02:00
* select_forget *
*===========================================================================*/
void select_forget(void)
{
/* The calling thread's associated process is expected to be unpaused, due to
* a signal that is supposed to interrupt the current system call. Totally
* forget about the select(). This function may block its calling thread if
* necessary (but it doesn't).
*/
int slot;
struct selectentry *se;
for (slot = 0; slot < MAXSELECTS; slot++) {
se = &selecttab[slot];
if (se->requestor == fp)
break;
}
if (slot >= MAXSELECTS) return; /* Entry not found */
assert(se->starting == FALSE);
/* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */
select_cancel_all(se);
}
/*===========================================================================*
2005-09-11 18:45:46 +02:00
* select_timeout_check *
*===========================================================================*/
void select_timeout_check(minix_timer_t *timer)
{
/* An alarm has gone off for one of the select queries. This function MUST NOT
* block its calling thread.
*/
int s;
struct selectentry *se;
s = tmr_arg(timer)->ta_int;
if (s < 0 || s >= MAXSELECTS) return; /* Entry does not exist */
2012-02-13 16:28:04 +01:00
se = &selecttab[s];
if (se->requestor == NULL) return;
if (se->expiry <= 0) return; /* Strange, did we even ask for a timeout? */
se->expiry = 0;
if (!is_deferred(se))
select_return(se);
else
se->block = 0; /* timer triggered "too soon", treat as nonblocking */
}
/*===========================================================================*
endpoint-aware conversion of servers. 'who', indicating caller number in pm and fs and some other servers, has been removed in favour of 'who_e' (endpoint) and 'who_p' (proc nr.). In both PM and FS, isokendpt() convert endpoints to process slot numbers, returning OK if it was a valid and consistent endpoint number. okendpt() does the same but panic()s if it doesn't succeed. (In PM, this is pm_isok..) pm and fs keep their own records of process endpoints in their proc tables, which are needed to make kernel calls about those processes. message field names have changed. fs drivers are endpoints. fs now doesn't try to get out of driver deadlock, as the protocol isn't supposed to let that happen any more. (A warning is printed if ELOCKED is detected though.) fproc[].fp_task (indicating which driver the process is suspended on) became an int. PM and FS now get endpoint numbers of initial boot processes from the kernel. These happen to be the same as the old proc numbers, to let user processes reach them with the old numbers, but FS and PM don't know that. All new processes after INIT, even after the generation number wraps around, get endpoint numbers with generation 1 and higher, so the first instances of the boot processes are the only processes ever to have endpoint numbers in the old proc number range. More return code checks of sys_* functions have been added. IS has become endpoint-aware. Ditched the 'text' and 'data' fields in the kernel dump (which show locations, not sizes, so aren't terribly useful) in favour of the endpoint number. Proc number is still visible. Some other dumps (e.g. dmap, rs) show endpoint numbers now too which got the formatting changed. PM reading segments using rw_seg() has changed - it uses other fields in the message now instead of encoding the segment and process number and fd in the fd field. For that it uses _read_pm() and _write_pm() which to _taskcall()s directly in pm/misc.c. PM now sys_exit()s itself on panic(), instead of sys_abort(). RS also talks in endpoints instead of process numbers.
2006-03-03 11:20:58 +01:00
* select_unsuspend_by_endpt *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void select_unsuspend_by_endpt(endpoint_t proc_e)
{
/* Revive blocked processes when a driver has disappeared */
devmajor_t major;
int fd, s;
struct selectentry *se;
struct filp *f;
for (s = 0; s < MAXSELECTS; s++) {
2012-02-13 16:28:04 +01:00
int wakehim = 0;
se = &selecttab[s];
if (se->requestor == NULL) continue;
2012-02-13 16:28:04 +01:00
if (se->requestor->fp_endpoint == proc_e) {
assert(se->requestor->fp_flags & FP_EXITING);
select_cancel_all(se);
continue;
}
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL || !is_char_device(f))
continue;
assert(f->filp_char_select_dev != NO_DEV);
major = major(f->filp_char_select_dev);
if (dmap_driver_match(proc_e, major)) {
se->filps[fd] = NULL;
se->error = EIO;
select_cancel_filp(f);
wakehim = 1;
}
}
if (wakehim && !is_deferred(se))
select_return(se);
}
}
2008-02-22 16:46:59 +01:00
/*===========================================================================*
* select_reply1 *
*===========================================================================*/
void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
2008-02-22 16:46:59 +01:00
{
/* Handle the initial reply to CDEV_SELECT request. This function MUST NOT
* block its calling thread.
*/
devmajor_t major;
dev_t dev;
struct filp *f;
struct dmap *dp;
2008-02-22 16:46:59 +01:00
/* Figure out which device is replying */
2012-02-13 16:28:04 +01:00
if ((dp = get_dmap(driver_e)) == NULL) return;
major = dp-dmap;
dev = makedev(major, minor);
2008-02-22 16:46:59 +01:00
/* Get filp belonging to character special file */
if (!dp->dmap_sel_busy) {
printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n",
__FILE__, __LINE__, major);
return;
}
2008-02-22 16:46:59 +01:00
/* The select filp may have been set to NULL if the requestor has been
* unpaused in the meantime. In that case, we ignore the result, but we do
* look for other filps to restart later.
*/
if ((f = dp->dmap_sel_filp) != NULL) {
/* Find vnode and check we got a reply from the device we expected */
assert(is_char_device(f));
assert(f->filp_char_select_dev != NO_DEV);
if (f->filp_char_select_dev != dev) {
/* This should never happen. The driver may be misbehaving.
* For now we assume that the reply we want will arrive later..
*/
printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
__FILE__, __LINE__, f->filp_char_select_dev, dev);
return;
}
}
2008-02-22 16:46:59 +01:00
/* No longer waiting for a reply from this device */
dp->dmap_sel_busy = FALSE;
dp->dmap_sel_filp = NULL;
/* Process the select result only if the filp is valid. */
if (f != NULL) {
assert(f->filp_count >= 1);
assert(f->filp_select_flags & FSF_BUSY);
2012-02-13 16:28:04 +01:00
f->filp_select_flags &= ~FSF_BUSY;
/* The select call is done now, except when
* - another process started a select on the same filp with possibly a
* different set of operations.
* - a process does a select on the same filp but using different file
* descriptors.
* - the select has a timeout. Upon receiving this reply the operations
* might not be ready yet, so we want to wait for that to ultimately
* happen.
* Therefore we need to keep remembering what the operations are.
*/
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
2012-02-13 16:28:04 +01:00
/* there may be operations pending */
f->filp_select_ops &= ~status;
/* Record new filp status */
2012-02-13 16:28:04 +01:00
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
if (status > 0) { /* operations ready */
if (status & SEL_RD)
f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR)
f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR)
f->filp_select_flags &= ~FSF_ERR_BLOCK;
} else if (status < 0) { /* error */
/* Always unblock upon error */
f->filp_select_flags &= ~FSF_BLOCKED;
}
}
filp_status(f, status); /* Tell filp owners about the results */
}
2008-02-22 16:46:59 +01:00
select_restart_filps();
2008-02-22 16:46:59 +01:00
}
/*===========================================================================*
* select_reply2 *
*===========================================================================*/
void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
2008-02-22 16:46:59 +01:00
{
/* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
* the select request is 'blocking' until an operation becomes ready. This
* function MUST NOT block its calling thread.
*/
int slot, found, fd;
devmajor_t major;
dev_t dev;
struct filp *f;
struct dmap *dp;
struct selectentry *se;
2008-02-22 16:46:59 +01:00
if (status == 0) {
2012-02-13 16:28:04 +01:00
printf("VFS (%s:%d): weird status (%d) to report\n",
__FILE__, __LINE__, status);
return;
}
/* Figure out which device is replying */
if ((dp = get_dmap(driver_e)) == NULL) {
printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
__FILE__, __LINE__, driver_e);
return;
}
major = dp-dmap;
dev = makedev(major, minor);
/* Find all file descriptors selecting for this device */
for (slot = 0; slot < MAXSELECTS; slot++) {
2012-02-13 16:28:04 +01:00
se = &selecttab[slot];
if (se->requestor == NULL) continue; /* empty slot */
2012-02-13 16:28:04 +01:00
found = FALSE;
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) continue;
if (!is_char_device(f)) continue;
assert(f->filp_char_select_dev != NO_DEV);
if (f->filp_char_select_dev != dev) continue;
2012-02-13 16:28:04 +01:00
if (status > 0) { /* Operations ready */
/* Clear the replied bits from the request
* mask unless FSF_UPDATE is set.
*/
2012-02-13 16:28:04 +01:00
if (!(f->filp_select_flags & FSF_UPDATE))
f->filp_select_ops &= ~status;
if (status & SEL_RD)
f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR)
f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR)
f->filp_select_flags &= ~FSF_ERR_BLOCK;
ops2tab(status, fd, se);
} else {
2012-02-13 16:28:04 +01:00
f->filp_select_flags &= ~FSF_BLOCKED;
se->error = status;
2008-02-22 16:46:59 +01:00
}
found = TRUE;
2008-02-22 16:46:59 +01:00
}
/* Even if 'found' is set now, nothing may have changed for this call,
* as it may not have been interested in the operations that were
* reported as ready. Let restart_proc check.
*/
if (found)
restart_proc(se);
}
2008-02-22 16:46:59 +01:00
select_restart_filps();
}
2008-02-22 16:46:59 +01:00
/*===========================================================================*
* select_restart_filps *
*===========================================================================*/
static void select_restart_filps(void)
2008-02-22 16:46:59 +01:00
{
/* We got a result from a character driver, and now we need to check if we can
* restart deferred polling operations. This function MUST NOT block its
* calling thread.
*/
int fd, slot;
struct filp *f;
struct selectentry *se;
/* Locate filps that can be restarted */
for (slot = 0; slot < MAXSELECTS; slot++) {
se = &selecttab[slot];
if (se->requestor == NULL) continue; /* empty slot */
/* Only 'deferred' processes are eligible to restart */
if (!is_deferred(se)) continue;
/* Find filps that are not waiting for a reply, but have an updated
2012-02-13 16:28:04 +01:00
* status (i.e., another select on the same filp with possibly a
* different set of operations is to be done), and thus requires the
* select request to be sent again).
*/
for (fd = 0; fd < se->nfds; fd++) {
int r, wantops, ops;
if ((f = se->filps[fd]) == NULL) continue;
if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
continue; /* initial reply */
if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in */
continue; /* 'update' state */
/* This function is suitable only for character devices. In
* particular, checking pipes the same way would introduce a
* serious locking problem.
*/
assert(is_char_device(f));
wantops = ops = f->filp_select_ops;
r = select_request_char(f, &wantops, se->block, se->requestor);
if (r != OK && r != SUSPEND) {
se->error = r;
restart_proc(se);
break; /* Error or bogus return code; abort */
}
if (wantops & ops) ops2tab(wantops, fd, se);
2008-02-22 16:46:59 +01:00
}
}
2008-02-22 16:46:59 +01:00
}
/*===========================================================================*
* filp_status *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void filp_status(f, status)
struct filp *f;
2008-02-22 16:46:59 +01:00
int status;
{
/* Tell processes that need to know about the status of this filp. This
* function MUST NOT block its calling thread.
*/
int fd, slot, found;
struct selectentry *se;
for (slot = 0; slot < MAXSELECTS; slot++) {
se = &selecttab[slot];
if (se->requestor == NULL) continue; /* empty slot */
found = FALSE;
for (fd = 0; fd < se->nfds; fd++) {
if (se->filps[fd] != f) continue;
if (status < 0)
se->error = status;
2012-02-13 16:28:04 +01:00
else
ops2tab(status, fd, se);
found = TRUE;
2008-02-22 16:46:59 +01:00
}
if (found)
restart_proc(se);
}
2008-02-22 16:46:59 +01:00
}
/*===========================================================================*
* restart_proc *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void restart_proc(se)
struct selectentry *se;
2008-02-22 16:46:59 +01:00
{
/* Tell process about select results (if any) unless there are still results
* pending. This function MUST NOT block its calling thread.
*/
if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se))
select_return(se);
2008-02-22 16:46:59 +01:00
}
/*===========================================================================*
* wipe_select *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void wipe_select(struct selectentry *se)
{
se->nfds = 0;
se->nreadyfds = 0;
se->error = OK;
2012-02-13 16:28:04 +01:00
se->block = 0;
memset(se->filps, 0, sizeof(se->filps));
FD_ZERO(&se->readfds);
FD_ZERO(&se->writefds);
FD_ZERO(&se->errorfds);
FD_ZERO(&se->ready_readfds);
FD_ZERO(&se->ready_writefds);
FD_ZERO(&se->ready_errorfds);
}
2012-02-13 16:28:04 +01:00
/*===========================================================================*
* select_lock_filp *
2012-02-13 16:28:04 +01:00
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void select_lock_filp(struct filp *f, int ops)
2012-02-13 16:28:04 +01:00
{
/* Lock a filp and vnode based on which operations are requested. This function
* may block its calling thread, obviously.
*/
tll_access_t locktype;
2012-02-13 16:28:04 +01:00
locktype = VNODE_READ; /* By default */
if (ops & (SEL_WR|SEL_ERR))
/* Selecting for error or writing requires exclusive access */
locktype = VNODE_WRITE;
lock_filp(f, locktype);
}