2005-04-21 16:53:53 +02:00
|
|
|
/* This file contains the procedures that manipulate file descriptors.
|
|
|
|
*
|
|
|
|
* The entry points into this file are
|
2010-08-30 15:44:07 +02:00
|
|
|
* get_fd: look for free file descriptor and free filp slots
|
|
|
|
* get_filp: look up the filp entry for a given file descriptor
|
|
|
|
* find_filp: find a filp slot that points to a given vnode
|
|
|
|
* inval_filp: invalidate a filp and associated fd's, only let close()
|
|
|
|
* happen on it
|
2012-02-13 16:28:04 +01:00
|
|
|
* do_verify_fd: verify whether the given file descriptor is valid for
|
2010-08-30 15:44:07 +02:00
|
|
|
* the given endpoint.
|
|
|
|
* do_set_filp: marks a filp as in-flight.
|
|
|
|
* do_copy_filp: copies a filp to another endpoint.
|
|
|
|
* do_put_filp: marks a filp as not in-flight anymore.
|
2012-02-13 16:28:04 +01:00
|
|
|
* do_cancel_fd: cancel the transaction when something goes wrong for
|
2010-08-30 15:44:07 +02:00
|
|
|
* the receiver.
|
2005-04-21 16:53:53 +02:00
|
|
|
*/
|
|
|
|
|
2006-03-09 17:00:33 +01:00
|
|
|
#include <sys/select.h>
|
2010-08-30 15:44:07 +02:00
|
|
|
#include <minix/callnr.h>
|
2006-11-27 15:21:43 +01:00
|
|
|
#include <minix/u64.h>
|
Mostly bugfixes of bugs triggered by the test set.
bugfixes:
SYSTEM:
. removed
rc->p_priv->s_flags = 0;
for the priv struct shared by all user processes in get_priv(). this
should only be done once. doing a SYS_PRIV_USER in sys_privctl()
caused the flags of all user processes to be reset, so they were no
longer PREEMPTIBLE. this happened when RS executed a policy script.
(this broke test1 in the test set)
VFS/MFS:
. chown can change the mode of a file, and chmod arguments are only
part of the full file mode so the full filemode is slightly magic.
changed these calls so that the final modes are returned to VFS, so
that the vnode can be kept up-to-date.
(this broke test11 in the test set)
MFS:
. lookup() checked for sizeof(string) instead of sizeof(user_path),
truncating long path names
(caught by test 23)
. truncate functions neglected to update ctime
(this broke test16)
VFS:
. corner case of an empty filename lookup caused fields of a request
not to be filled in in the lookup functions, not making it clear
that the lookup had failed, causing messages to garbage processes,
causing strange failures.
(caught by test 30)
. trust v_size in vnode when doing reads or writes on non-special
files, truncating i/o where necessary; this is necessary for pipes,
as MFS can't tell when a pipe has been truncated without it being
told explicitly each time.
when the last reader/writer on a pipe closes, tell FS about
the new size using truncate_vn().
(this broke test 25, among others)
. permission check for chdir() had disappeared; added a
forbidden() call
(caught by test 23)
new code, shouldn't change anything:
. introduced RTS_SET, RTS_UNSET, and RTS_ISSET macro's, and their
LOCK variants. These macros set and clear the p_rts_flags field,
causing a lot of duplicated logic like
old_flags = rp->p_rts_flags; /* save value of the flags */
rp->p_rts_flags &= ~NO_PRIV;
if (old_flags != 0 && rp->p_rts_flags == 0) lock_enqueue(rp);
to change into the simpler
RTS_LOCK_UNSET(rp, NO_PRIV);
so the macros take care of calling dequeue() and enqueue() (or lock_*()),
as the case may be). This makes the code a bit more readable and a
bit less fragile.
. removed return code from do_clocktick in CLOCK as it currently
never replies
. removed some debug code from VFS
. fixed grant debug message in device.c
preemptive checks, tests, changes:
. added return code checks of receive() to SYSTEM and CLOCK
. O_TRUNC should never arrive at MFS (added sanity check and removed
O_TRUNC code)
. user_path declared with PATH_MAX+1 to let it be null-terminated
. checks in MFS to see if strings passed by VFS are null-terminated
IS:
. static irq name table thrown out
2007-02-01 18:50:02 +01:00
|
|
|
#include <assert.h>
|
2012-04-16 11:04:32 +02:00
|
|
|
#include <sys/stat.h>
|
2005-04-21 16:53:53 +02:00
|
|
|
#include "fs.h"
|
|
|
|
#include "file.h"
|
|
|
|
#include "fproc.h"
|
2006-10-25 15:40:36 +02:00
|
|
|
#include "vnode.h"
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
static filp_id_t verify_fd(endpoint_t ep, int fd);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
#if LOCK_DEBUG
|
|
|
|
/*===========================================================================*
|
|
|
|
* check_filp_locks *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void check_filp_locks_by_me(void)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
/* Check whether this thread still has filp locks held */
|
|
|
|
struct filp *f;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
|
|
|
r = mutex_trylock(&f->filp_lock);
|
|
|
|
if (r == -EDEADLK)
|
|
|
|
panic("Thread %d still holds filp lock on filp %p call_nr=%d\n",
|
2012-04-13 14:50:38 +02:00
|
|
|
mthread_self(), f, job_call_nr);
|
2012-02-13 16:28:04 +01:00
|
|
|
else if (r == 0) {
|
|
|
|
/* We just obtained the lock, release it */
|
|
|
|
mutex_unlock(&f->filp_lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* check_filp_locks *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void check_filp_locks(void)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct filp *f;
|
|
|
|
int r, count = 0;
|
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
|
|
|
r = mutex_trylock(&f->filp_lock);
|
|
|
|
if (r == -EBUSY) {
|
|
|
|
/* Mutex is still locked */
|
|
|
|
count++;
|
|
|
|
} else if (r == 0) {
|
|
|
|
/* We just obtained a lock, don't want it */
|
|
|
|
mutex_unlock(&f->filp_lock);
|
|
|
|
} else
|
|
|
|
panic("filp_lock weird state");
|
|
|
|
}
|
|
|
|
if (count) panic("locked filps");
|
|
|
|
#if 0
|
|
|
|
else printf("check_filp_locks OK\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* do_filp_gc *
|
|
|
|
*===========================================================================*/
|
|
|
|
void *do_filp_gc(void *UNUSED(arg))
|
|
|
|
{
|
|
|
|
struct filp *f;
|
|
|
|
struct vnode *vp;
|
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
|
|
|
if (!(f->filp_state & FS_INVALIDATED)) continue;
|
2013-02-15 16:09:24 +01:00
|
|
|
|
|
|
|
if (f->filp_mode == FILP_CLOSED || f->filp_vno == NULL) {
|
|
|
|
/* File was already closed before gc could kick in */
|
|
|
|
assert(f->filp_count <= 0);
|
|
|
|
f->filp_state &= ~FS_INVALIDATED;
|
|
|
|
f->filp_count = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
assert(f->filp_vno != NULL);
|
|
|
|
vp = f->filp_vno;
|
|
|
|
|
|
|
|
/* Synchronize with worker thread that might hold a lock on the vp */
|
|
|
|
lock_vnode(vp, VNODE_OPCL);
|
|
|
|
unlock_vnode(vp);
|
|
|
|
|
|
|
|
/* If garbage collection was invoked due to a failed device open
|
|
|
|
* request, then common_open has already cleaned up and we have
|
|
|
|
* nothing to do.
|
|
|
|
*/
|
|
|
|
if (!(f->filp_state & FS_INVALIDATED)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If garbage collection was invoked due to a failed device close
|
|
|
|
* request, the close_filp has already cleaned up and we have nothing
|
|
|
|
* to do.
|
|
|
|
*/
|
|
|
|
if (f->filp_mode != FILP_CLOSED) {
|
|
|
|
assert(f->filp_count == 0);
|
|
|
|
f->filp_count = 1; /* So lock_filp and close_filp will do
|
|
|
|
* their job */
|
|
|
|
lock_filp(f, VNODE_READ);
|
|
|
|
close_filp(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
f->filp_state &= ~FS_INVALIDATED;
|
|
|
|
}
|
|
|
|
|
|
|
|
thread_cleanup(NULL);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* init_filps *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void init_filps(void)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
/* Initialize filps */
|
|
|
|
struct filp *f;
|
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
2012-07-13 18:08:06 +02:00
|
|
|
if (mutex_init(&f->filp_lock, NULL) != 0)
|
|
|
|
panic("Failed to initialize filp mutex");
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* get_fd *
|
|
|
|
*===========================================================================*/
|
2013-05-07 14:41:07 +02:00
|
|
|
int get_fd(struct fproc *rfp, int start, mode_t bits, int *k, struct filp **fpt)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Look for a free file descriptor and a free filp slot. Fill in the mode word
|
|
|
|
* in the latter, but don't claim either one yet, since the open() or creat()
|
|
|
|
* may yet fail.
|
|
|
|
*/
|
|
|
|
|
|
|
|
register struct filp *f;
|
|
|
|
register int i;
|
|
|
|
|
|
|
|
/* Search the fproc fp_filp table for a free file descriptor. */
|
|
|
|
for (i = start; i < OPEN_MAX; i++) {
|
2013-05-07 14:41:07 +02:00
|
|
|
if (rfp->fp_filp[i] == NULL && !FD_ISSET(i, &rfp->fp_filp_inuse)) {
|
2005-04-21 16:53:53 +02:00
|
|
|
/* A file descriptor has been located. */
|
|
|
|
*k = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check to see if a file descriptor has been found. */
|
Mostly bugfixes of bugs triggered by the test set.
bugfixes:
SYSTEM:
. removed
rc->p_priv->s_flags = 0;
for the priv struct shared by all user processes in get_priv(). this
should only be done once. doing a SYS_PRIV_USER in sys_privctl()
caused the flags of all user processes to be reset, so they were no
longer PREEMPTIBLE. this happened when RS executed a policy script.
(this broke test1 in the test set)
VFS/MFS:
. chown can change the mode of a file, and chmod arguments are only
part of the full file mode so the full filemode is slightly magic.
changed these calls so that the final modes are returned to VFS, so
that the vnode can be kept up-to-date.
(this broke test11 in the test set)
MFS:
. lookup() checked for sizeof(string) instead of sizeof(user_path),
truncating long path names
(caught by test 23)
. truncate functions neglected to update ctime
(this broke test16)
VFS:
. corner case of an empty filename lookup caused fields of a request
not to be filled in in the lookup functions, not making it clear
that the lookup had failed, causing messages to garbage processes,
causing strange failures.
(caught by test 30)
. trust v_size in vnode when doing reads or writes on non-special
files, truncating i/o where necessary; this is necessary for pipes,
as MFS can't tell when a pipe has been truncated without it being
told explicitly each time.
when the last reader/writer on a pipe closes, tell FS about
the new size using truncate_vn().
(this broke test 25, among others)
. permission check for chdir() had disappeared; added a
forbidden() call
(caught by test 23)
new code, shouldn't change anything:
. introduced RTS_SET, RTS_UNSET, and RTS_ISSET macro's, and their
LOCK variants. These macros set and clear the p_rts_flags field,
causing a lot of duplicated logic like
old_flags = rp->p_rts_flags; /* save value of the flags */
rp->p_rts_flags &= ~NO_PRIV;
if (old_flags != 0 && rp->p_rts_flags == 0) lock_enqueue(rp);
to change into the simpler
RTS_LOCK_UNSET(rp, NO_PRIV);
so the macros take care of calling dequeue() and enqueue() (or lock_*()),
as the case may be). This makes the code a bit more readable and a
bit less fragile.
. removed return code from do_clocktick in CLOCK as it currently
never replies
. removed some debug code from VFS
. fixed grant debug message in device.c
preemptive checks, tests, changes:
. added return code checks of receive() to SYSTEM and CLOCK
. O_TRUNC should never arrive at MFS (added sanity check and removed
O_TRUNC code)
. user_path declared with PATH_MAX+1 to let it be null-terminated
. checks in MFS to see if strings passed by VFS are null-terminated
IS:
. static irq name table thrown out
2007-02-01 18:50:02 +01:00
|
|
|
if (i >= OPEN_MAX) return(EMFILE);
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* If we don't care about a filp, return now */
|
|
|
|
if (fpt == NULL) return(OK);
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/* Now that a file descriptor has been found, look for a free filp slot. */
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
Mostly bugfixes of bugs triggered by the test set.
bugfixes:
SYSTEM:
. removed
rc->p_priv->s_flags = 0;
for the priv struct shared by all user processes in get_priv(). this
should only be done once. doing a SYS_PRIV_USER in sys_privctl()
caused the flags of all user processes to be reset, so they were no
longer PREEMPTIBLE. this happened when RS executed a policy script.
(this broke test1 in the test set)
VFS/MFS:
. chown can change the mode of a file, and chmod arguments are only
part of the full file mode so the full filemode is slightly magic.
changed these calls so that the final modes are returned to VFS, so
that the vnode can be kept up-to-date.
(this broke test11 in the test set)
MFS:
. lookup() checked for sizeof(string) instead of sizeof(user_path),
truncating long path names
(caught by test 23)
. truncate functions neglected to update ctime
(this broke test16)
VFS:
. corner case of an empty filename lookup caused fields of a request
not to be filled in in the lookup functions, not making it clear
that the lookup had failed, causing messages to garbage processes,
causing strange failures.
(caught by test 30)
. trust v_size in vnode when doing reads or writes on non-special
files, truncating i/o where necessary; this is necessary for pipes,
as MFS can't tell when a pipe has been truncated without it being
told explicitly each time.
when the last reader/writer on a pipe closes, tell FS about
the new size using truncate_vn().
(this broke test 25, among others)
. permission check for chdir() had disappeared; added a
forbidden() call
(caught by test 23)
new code, shouldn't change anything:
. introduced RTS_SET, RTS_UNSET, and RTS_ISSET macro's, and their
LOCK variants. These macros set and clear the p_rts_flags field,
causing a lot of duplicated logic like
old_flags = rp->p_rts_flags; /* save value of the flags */
rp->p_rts_flags &= ~NO_PRIV;
if (old_flags != 0 && rp->p_rts_flags == 0) lock_enqueue(rp);
to change into the simpler
RTS_LOCK_UNSET(rp, NO_PRIV);
so the macros take care of calling dequeue() and enqueue() (or lock_*()),
as the case may be). This makes the code a bit more readable and a
bit less fragile.
. removed return code from do_clocktick in CLOCK as it currently
never replies
. removed some debug code from VFS
. fixed grant debug message in device.c
preemptive checks, tests, changes:
. added return code checks of receive() to SYSTEM and CLOCK
. O_TRUNC should never arrive at MFS (added sanity check and removed
O_TRUNC code)
. user_path declared with PATH_MAX+1 to let it be null-terminated
. checks in MFS to see if strings passed by VFS are null-terminated
IS:
. static irq name table thrown out
2007-02-01 18:50:02 +01:00
|
|
|
assert(f->filp_count >= 0);
|
2012-02-13 16:28:04 +01:00
|
|
|
if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) {
|
2005-04-21 16:53:53 +02:00
|
|
|
f->filp_mode = bits;
|
2013-06-17 10:31:12 +02:00
|
|
|
f->filp_pos = ((u64_t)(0));
|
2005-06-17 15:41:12 +02:00
|
|
|
f->filp_selectors = 0;
|
|
|
|
f->filp_select_ops = 0;
|
|
|
|
f->filp_pipe_select_ops = 0;
|
2005-04-21 16:53:53 +02:00
|
|
|
f->filp_flags = 0;
|
2008-02-22 15:19:23 +01:00
|
|
|
f->filp_state = FS_NORMAL;
|
|
|
|
f->filp_select_flags = 0;
|
2012-02-13 16:28:04 +01:00
|
|
|
f->filp_softlock = NULL;
|
2005-04-21 16:53:53 +02:00
|
|
|
*fpt = f;
|
|
|
|
return(OK);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If control passes here, the filp table must be full. Report that back. */
|
|
|
|
return(ENFILE);
|
|
|
|
}
|
|
|
|
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* get_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
struct filp *get_filp(fild, locktype)
|
2005-04-21 16:53:53 +02:00
|
|
|
int fild; /* file descriptor */
|
2012-02-13 16:28:04 +01:00
|
|
|
tll_access_t locktype;
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
2006-05-11 16:57:23 +02:00
|
|
|
/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
return get_filp2(fp, fild, locktype);
|
2006-05-11 16:57:23 +02:00
|
|
|
}
|
|
|
|
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
2006-05-11 16:57:23 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* get_filp2 *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
struct filp *get_filp2(rfp, fild, locktype)
|
2006-05-11 16:57:23 +02:00
|
|
|
register struct fproc *rfp;
|
|
|
|
int fild; /* file descriptor */
|
2012-02-13 16:28:04 +01:00
|
|
|
tll_access_t locktype;
|
2006-05-11 16:57:23 +02:00
|
|
|
{
|
2005-04-21 16:53:53 +02:00
|
|
|
/* See if 'fild' refers to a valid file descr. If so, return its filp ptr. */
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *filp;
|
2005-04-21 16:53:53 +02:00
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
filp = NULL;
|
|
|
|
if (fild < 0 || fild >= OPEN_MAX)
|
|
|
|
err_code = EBADF;
|
|
|
|
else if (rfp->fp_filp[fild] == NULL && FD_ISSET(fild, &rfp->fp_filp_inuse))
|
2007-08-15 14:53:52 +02:00
|
|
|
err_code = EIO; /* The filedes is not there, but is not closed either.
|
|
|
|
*/
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
else if ((filp = rfp->fp_filp[fild]) == NULL)
|
|
|
|
err_code = EBADF;
|
|
|
|
else
|
|
|
|
lock_filp(filp, locktype); /* All is fine */
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
return(filp); /* may also be NULL */
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* find_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
struct filp *find_filp(struct vnode *vp, mode_t bits)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
Mostly bugfixes of bugs triggered by the test set.
bugfixes:
SYSTEM:
. removed
rc->p_priv->s_flags = 0;
for the priv struct shared by all user processes in get_priv(). this
should only be done once. doing a SYS_PRIV_USER in sys_privctl()
caused the flags of all user processes to be reset, so they were no
longer PREEMPTIBLE. this happened when RS executed a policy script.
(this broke test1 in the test set)
VFS/MFS:
. chown can change the mode of a file, and chmod arguments are only
part of the full file mode so the full filemode is slightly magic.
changed these calls so that the final modes are returned to VFS, so
that the vnode can be kept up-to-date.
(this broke test11 in the test set)
MFS:
. lookup() checked for sizeof(string) instead of sizeof(user_path),
truncating long path names
(caught by test 23)
. truncate functions neglected to update ctime
(this broke test16)
VFS:
. corner case of an empty filename lookup caused fields of a request
not to be filled in in the lookup functions, not making it clear
that the lookup had failed, causing messages to garbage processes,
causing strange failures.
(caught by test 30)
. trust v_size in vnode when doing reads or writes on non-special
files, truncating i/o where necessary; this is necessary for pipes,
as MFS can't tell when a pipe has been truncated without it being
told explicitly each time.
when the last reader/writer on a pipe closes, tell FS about
the new size using truncate_vn().
(this broke test 25, among others)
. permission check for chdir() had disappeared; added a
forbidden() call
(caught by test 23)
new code, shouldn't change anything:
. introduced RTS_SET, RTS_UNSET, and RTS_ISSET macro's, and their
LOCK variants. These macros set and clear the p_rts_flags field,
causing a lot of duplicated logic like
old_flags = rp->p_rts_flags; /* save value of the flags */
rp->p_rts_flags &= ~NO_PRIV;
if (old_flags != 0 && rp->p_rts_flags == 0) lock_enqueue(rp);
to change into the simpler
RTS_LOCK_UNSET(rp, NO_PRIV);
so the macros take care of calling dequeue() and enqueue() (or lock_*()),
as the case may be). This makes the code a bit more readable and a
bit less fragile.
. removed return code from do_clocktick in CLOCK as it currently
never replies
. removed some debug code from VFS
. fixed grant debug message in device.c
preemptive checks, tests, changes:
. added return code checks of receive() to SYSTEM and CLOCK
. O_TRUNC should never arrive at MFS (added sanity check and removed
O_TRUNC code)
. user_path declared with PATH_MAX+1 to let it be null-terminated
. checks in MFS to see if strings passed by VFS are null-terminated
IS:
. static irq name table thrown out
2007-02-01 18:50:02 +01:00
|
|
|
/* Find a filp slot that refers to the vnode 'vp' in a way as described
|
2005-04-21 16:53:53 +02:00
|
|
|
* by the mode bit 'bits'. Used for determining whether somebody is still
|
|
|
|
* interested in either end of a pipe. Also used when opening a FIFO to
|
|
|
|
* find partners to share a filp field with (to shared the file position).
|
|
|
|
* Like 'get_fd' it performs its job by linear search through the filp table.
|
|
|
|
*/
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *f;
|
2005-04-21 16:53:53 +02:00
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
2012-02-13 16:28:04 +01:00
|
|
|
if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) {
|
2005-04-21 16:53:53 +02:00
|
|
|
return(f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If control passes here, the filp wasn't there. Report that back. */
|
2010-05-10 15:26:00 +02:00
|
|
|
return(NULL);
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
2006-03-09 17:00:33 +01:00
|
|
|
|
|
|
|
/*===========================================================================*
|
2012-02-13 16:28:04 +01:00
|
|
|
* invalidate_filp *
|
2006-03-09 17:00:33 +01:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int invalidate_filp(struct filp *rfilp)
|
2006-03-09 17:00:33 +01:00
|
|
|
{
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
/* Invalidate filp. fp_filp_inuse is not cleared, so filp can't be reused
|
|
|
|
until it is closed first. */
|
|
|
|
|
|
|
|
int f, fd, n = 0;
|
2012-02-13 16:28:04 +01:00
|
|
|
for (f = 0; f < NR_PROCS; f++) {
|
|
|
|
if (fproc[f].fp_pid == PID_FREE) continue;
|
|
|
|
for (fd = 0; fd < OPEN_MAX; fd++) {
|
|
|
|
if(fproc[f].fp_filp[fd] && fproc[f].fp_filp[fd] == rfilp) {
|
2010-05-10 15:26:00 +02:00
|
|
|
fproc[f].fp_filp[fd] = NULL;
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
n++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
rfilp->filp_state |= FS_INVALIDATED;
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
return(n); /* Report back how often this filp has been invalidated. */
|
2006-03-09 17:00:33 +01:00
|
|
|
}
|
2010-08-30 15:44:07 +02:00
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* invalidate_filp_by_char_major *
|
|
|
|
*===========================================================================*/
|
|
|
|
void invalidate_filp_by_char_major(int major)
|
|
|
|
{
|
|
|
|
struct filp *f;
|
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
|
|
|
if (f->filp_count != 0 && f->filp_vno != NULL) {
|
|
|
|
if (major(f->filp_vno->v_sdev) == major &&
|
|
|
|
S_ISCHR(f->filp_vno->v_mode)) {
|
|
|
|
(void) invalidate_filp(f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-30 15:44:07 +02:00
|
|
|
/*===========================================================================*
|
2012-02-13 16:28:04 +01:00
|
|
|
* invalidate_filp_by_endpt *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void invalidate_filp_by_endpt(endpoint_t proc_e)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct filp *f;
|
|
|
|
|
|
|
|
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
|
|
|
|
if (f->filp_count != 0 && f->filp_vno != NULL) {
|
|
|
|
if (f->filp_vno->v_fs_e == proc_e)
|
|
|
|
(void) invalidate_filp(f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* lock_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void lock_filp(filp, locktype)
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *filp;
|
|
|
|
tll_access_t locktype;
|
|
|
|
{
|
|
|
|
struct fproc *org_fp;
|
|
|
|
struct worker_thread *org_self;
|
|
|
|
struct vnode *vp;
|
|
|
|
|
|
|
|
assert(filp->filp_count > 0);
|
|
|
|
vp = filp->filp_vno;
|
|
|
|
assert(vp != NULL);
|
|
|
|
|
|
|
|
/* Lock vnode only if we haven't already locked it. If already locked by us,
|
|
|
|
* we're allowed to have one additional 'soft' lock. */
|
|
|
|
if (tll_locked_by_me(&vp->v_lock)) {
|
|
|
|
assert(filp->filp_softlock == NULL);
|
|
|
|
filp->filp_softlock = fp;
|
|
|
|
} else {
|
VFS: fix locking bugs
.sync and fsync used unnecessarily restrictive locking type
.fsync violated locking order by obtaining a vmnt lock after a filp lock
.fsync contained a TOCTOU bug
.new_node violated locking rules (didn't upgrade lock upon file creation)
.do_pipe used unnecessarily restrictive locking type
.always lock pipes exclusively; even a read operation might require to do
a write on a vnode object (update pipe size)
.when opening a file with O_TRUNC, upgrade vnode lock when truncating
.utime used unnecessarily restrictive locking type
.path parsing:
.always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to
VMNT_READ if that was what was actually requested. This prevents the
following deadlock scenario:
thread A:
lock_vmnt(vmp, TLL_READSER);
lock_vnode(vp, TLL_READSER);
upgrade_vmnt_lock(vmp, TLL_WRITE);
thread B:
lock_vmnt(vmp, TLL_READ);
lock_vnode(vp, TLL_READSER);
thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in
lock_vnode. This happens when, for example, thread A tries create a
new node (open.c:new_node) and thread B tries to do eat_path to
change dir (stadir.c:do_chdir). When the path is being resolved, a
vnode is always locked with VNODE_OPCL (TLL_READSER) and then
downgraded to VNODE_READ if read-only is actually requested. Thread
A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows
VMNT_READ locks. Thread B can't acquire a lock on the vnode because
thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE
(TLL_WRITE) because thread B has a VMNT_READ lock on it.
By serializing vmnt locks during path parsing, thread B can only
acquire a lock on vmp when thread A has completely finished its
operation.
2012-11-30 13:49:53 +01:00
|
|
|
/* We have to make an exception for vnodes belonging to pipes. Even
|
|
|
|
* read(2) operations on pipes change the vnode and therefore require
|
|
|
|
* exclusive access.
|
|
|
|
*/
|
|
|
|
if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ)
|
|
|
|
locktype = VNODE_WRITE;
|
2012-02-13 16:28:04 +01:00
|
|
|
lock_vnode(vp, locktype);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(vp->v_ref_count > 0); /* vnode still in use? */
|
|
|
|
assert(filp->filp_vno == vp); /* vnode still what we think it is? */
|
|
|
|
|
|
|
|
/* First try to get filp lock right off the bat */
|
|
|
|
if (mutex_trylock(&filp->filp_lock) != 0) {
|
|
|
|
|
|
|
|
/* Already in use, let's wait for our turn */
|
|
|
|
org_fp = fp;
|
|
|
|
org_self = self;
|
|
|
|
|
|
|
|
if (mutex_lock(&filp->filp_lock) != 0)
|
|
|
|
panic("unable to obtain lock on filp");
|
|
|
|
|
|
|
|
fp = org_fp;
|
|
|
|
self = org_self;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* unlock_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void unlock_filp(filp)
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *filp;
|
|
|
|
{
|
|
|
|
/* If this filp holds a soft lock on the vnode, we must be the owner */
|
|
|
|
if (filp->filp_softlock != NULL)
|
|
|
|
assert(filp->filp_softlock == fp);
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
if (filp->filp_count > 0 || filp->filp_state & FS_INVALIDATED) {
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Only unlock vnode if filp is still in use */
|
|
|
|
|
|
|
|
/* and if we don't hold a soft lock */
|
|
|
|
if (filp->filp_softlock == NULL) {
|
|
|
|
assert(tll_islocked(&(filp->filp_vno->v_lock)));
|
|
|
|
unlock_vnode(filp->filp_vno);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
filp->filp_softlock = NULL;
|
|
|
|
if (mutex_unlock(&filp->filp_lock) != 0)
|
|
|
|
panic("unable to release lock on filp");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* unlock_filps *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void unlock_filps(filp1, filp2)
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *filp1;
|
|
|
|
struct filp *filp2;
|
|
|
|
{
|
|
|
|
/* Unlock two filps that are tied to the same vnode. As a thread can lock a
|
|
|
|
* vnode only once, unlocking the vnode twice would result in an error. */
|
|
|
|
|
|
|
|
/* No NULL pointers and not equal */
|
|
|
|
assert(filp1);
|
|
|
|
assert(filp2);
|
|
|
|
assert(filp1 != filp2);
|
|
|
|
|
|
|
|
/* Must be tied to the same vnode and not NULL */
|
|
|
|
assert(filp1->filp_vno == filp2->filp_vno);
|
|
|
|
assert(filp1->filp_vno != NULL);
|
|
|
|
|
|
|
|
if (filp1->filp_count > 0 && filp2->filp_count > 0) {
|
|
|
|
/* Only unlock vnode if filps are still in use */
|
|
|
|
unlock_vnode(filp1->filp_vno);
|
|
|
|
}
|
|
|
|
|
|
|
|
filp1->filp_softlock = NULL;
|
|
|
|
filp2->filp_softlock = NULL;
|
|
|
|
if (mutex_unlock(&filp2->filp_lock) != 0)
|
|
|
|
panic("unable to release filp lock on filp2");
|
|
|
|
if (mutex_unlock(&filp1->filp_lock) != 0)
|
|
|
|
panic("unable to release filp lock on filp1");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* verify_fd *
|
2010-08-30 15:44:07 +02:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static filp_id_t verify_fd(ep, fd)
|
2010-08-30 15:44:07 +02:00
|
|
|
endpoint_t ep;
|
|
|
|
int fd;
|
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Verify whether the file descriptor 'fd' is valid for the endpoint 'ep'. When
|
|
|
|
* the file descriptor is valid, verify_fd returns a pointer to that filp, else
|
|
|
|
* it returns NULL.
|
|
|
|
*/
|
|
|
|
int slot;
|
|
|
|
struct filp *rfilp;
|
2010-08-30 15:44:07 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (isokendpt(ep, &slot) != OK)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
rfilp = get_filp2(&fproc[slot], fd, VNODE_READ);
|
|
|
|
|
|
|
|
return(rfilp);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_verify_fd *
|
|
|
|
*===========================================================================*/
|
2013-04-12 18:41:23 +02:00
|
|
|
int do_verify_fd(message *m_out)
|
2010-08-30 15:44:07 +02:00
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *rfilp;
|
2012-04-13 14:50:38 +02:00
|
|
|
endpoint_t proc_e;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
proc_e = job_m_in.USER_ENDPT;
|
|
|
|
fd = job_m_in.COUNT;
|
|
|
|
|
|
|
|
rfilp = (struct filp *) verify_fd(proc_e, fd);
|
2013-04-12 18:41:23 +02:00
|
|
|
m_out->ADDRESS = (void *) rfilp;
|
2012-02-13 16:28:04 +01:00
|
|
|
if (rfilp != NULL) unlock_filp(rfilp);
|
|
|
|
return (rfilp != NULL) ? OK : EINVAL;
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* set_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int set_filp(sfilp)
|
2010-08-30 15:44:07 +02:00
|
|
|
filp_id_t sfilp;
|
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
if (sfilp == NULL) return(EINVAL);
|
|
|
|
|
|
|
|
lock_filp(sfilp, VNODE_READ);
|
|
|
|
sfilp->filp_count++;
|
|
|
|
unlock_filp(sfilp);
|
|
|
|
|
|
|
|
return(OK);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_set_filp *
|
|
|
|
*===========================================================================*/
|
2013-04-12 18:41:23 +02:00
|
|
|
int do_set_filp(message *UNUSED(m_out))
|
2010-08-30 15:44:07 +02:00
|
|
|
{
|
2012-04-13 14:50:38 +02:00
|
|
|
filp_id_t f;
|
|
|
|
f = (filp_id_t) job_m_in.ADDRESS;
|
|
|
|
return set_filp(f);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* copy_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int copy_filp(to_ep, cfilp)
|
2010-08-30 15:44:07 +02:00
|
|
|
endpoint_t to_ep;
|
|
|
|
filp_id_t cfilp;
|
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
int fd;
|
|
|
|
int slot;
|
|
|
|
struct fproc *rfp;
|
2010-08-30 15:44:07 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (isokendpt(to_ep, &slot) != OK) return(EINVAL);
|
|
|
|
rfp = &fproc[slot];
|
2010-08-30 15:44:07 +02:00
|
|
|
|
|
|
|
/* Find an open slot in fp_filp */
|
2012-02-13 16:28:04 +01:00
|
|
|
for (fd = 0; fd < OPEN_MAX; fd++) {
|
|
|
|
if (rfp->fp_filp[fd] == NULL &&
|
|
|
|
!FD_ISSET(fd, &rfp->fp_filp_inuse)) {
|
2010-08-30 15:44:07 +02:00
|
|
|
|
|
|
|
/* Found a free slot, add descriptor */
|
2012-02-13 16:28:04 +01:00
|
|
|
FD_SET(fd, &rfp->fp_filp_inuse);
|
|
|
|
rfp->fp_filp[fd] = cfilp;
|
|
|
|
rfp->fp_filp[fd]->filp_count++;
|
|
|
|
return(fd);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* File descriptor table is full */
|
|
|
|
return(EMFILE);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_copy_filp *
|
|
|
|
*===========================================================================*/
|
2013-04-12 18:41:23 +02:00
|
|
|
int do_copy_filp(message *UNUSED(m_out))
|
2010-08-30 15:44:07 +02:00
|
|
|
{
|
2012-04-13 14:50:38 +02:00
|
|
|
endpoint_t proc_e;
|
|
|
|
filp_id_t f;
|
|
|
|
|
|
|
|
proc_e = job_m_in.USER_ENDPT;
|
|
|
|
f = (filp_id_t) job_m_in.ADDRESS;
|
|
|
|
|
|
|
|
return copy_filp(proc_e, f);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* put_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int put_filp(pfilp)
|
2010-08-30 15:44:07 +02:00
|
|
|
filp_id_t pfilp;
|
|
|
|
{
|
|
|
|
if (pfilp == NULL) {
|
|
|
|
return EINVAL;
|
|
|
|
} else {
|
2012-02-13 16:28:04 +01:00
|
|
|
lock_filp(pfilp, VNODE_OPCL);
|
2010-08-30 15:44:07 +02:00
|
|
|
close_filp(pfilp);
|
2012-02-13 16:28:04 +01:00
|
|
|
return(OK);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_put_filp *
|
|
|
|
*===========================================================================*/
|
2013-04-12 18:41:23 +02:00
|
|
|
int do_put_filp(message *UNUSED(m_out))
|
2010-08-30 15:44:07 +02:00
|
|
|
{
|
2012-04-13 14:50:38 +02:00
|
|
|
filp_id_t f;
|
|
|
|
f = (filp_id_t) job_m_in.ADDRESS;
|
|
|
|
return put_filp(f);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2012-02-13 16:28:04 +01:00
|
|
|
* cancel_fd *
|
2010-08-30 15:44:07 +02:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int cancel_fd(ep, fd)
|
2010-08-30 15:44:07 +02:00
|
|
|
endpoint_t ep;
|
|
|
|
int fd;
|
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
int slot;
|
|
|
|
struct fproc *rfp;
|
|
|
|
struct filp *rfilp;
|
2010-08-30 15:44:07 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (isokendpt(ep, &slot) != OK) return(EINVAL);
|
|
|
|
rfp = &fproc[slot];
|
2010-08-30 15:44:07 +02:00
|
|
|
|
|
|
|
/* Check that the input 'fd' is valid */
|
2012-02-13 16:28:04 +01:00
|
|
|
rfilp = (struct filp *) verify_fd(ep, fd);
|
|
|
|
if (rfilp != NULL) {
|
2010-08-30 15:44:07 +02:00
|
|
|
/* Found a valid descriptor, remove it */
|
2012-02-13 16:28:04 +01:00
|
|
|
FD_CLR(fd, &rfp->fp_filp_inuse);
|
|
|
|
if (rfp->fp_filp[fd]->filp_count == 0) {
|
|
|
|
unlock_filp(rfilp);
|
|
|
|
printf("VFS: filp_count for slot %d fd %d already zero", slot,
|
|
|
|
fd);
|
|
|
|
return(EINVAL);
|
|
|
|
}
|
|
|
|
rfp->fp_filp[fd]->filp_count--;
|
|
|
|
rfp->fp_filp[fd] = NULL;
|
|
|
|
unlock_filp(rfilp);
|
|
|
|
return(fd);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* File descriptor is not valid for the endpoint. */
|
2012-02-13 16:28:04 +01:00
|
|
|
return(EINVAL);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_cancel_fd *
|
|
|
|
*===========================================================================*/
|
2013-04-12 18:41:23 +02:00
|
|
|
int do_cancel_fd(message *UNUSED(m_out))
|
2010-08-30 15:44:07 +02:00
|
|
|
{
|
2012-04-13 14:50:38 +02:00
|
|
|
endpoint_t proc_e;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
proc_e = job_m_in.USER_ENDPT;
|
|
|
|
fd = job_m_in.COUNT;
|
|
|
|
|
|
|
|
return cancel_fd(proc_e, fd);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* close_filp *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void close_filp(f)
|
2012-02-13 16:28:04 +01:00
|
|
|
struct filp *f;
|
2010-08-30 15:44:07 +02:00
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Close a file. Will also unlock filp when done */
|
|
|
|
|
2012-04-25 14:44:42 +02:00
|
|
|
int rw;
|
2010-08-30 15:44:07 +02:00
|
|
|
dev_t dev;
|
|
|
|
struct vnode *vp;
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Must be locked */
|
|
|
|
assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
|
|
|
|
assert(tll_islocked(&f->filp_vno->v_lock));
|
|
|
|
|
|
|
|
vp = f->filp_vno;
|
|
|
|
|
|
|
|
if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
|
2010-08-30 15:44:07 +02:00
|
|
|
/* Check to see if the file is special. */
|
2012-04-25 14:44:42 +02:00
|
|
|
if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode)) {
|
2010-08-30 15:44:07 +02:00
|
|
|
dev = (dev_t) vp->v_sdev;
|
2012-04-25 14:44:42 +02:00
|
|
|
if (S_ISBLK(vp->v_mode)) {
|
2012-02-13 16:28:04 +01:00
|
|
|
lock_bsf();
|
2010-08-30 15:44:07 +02:00
|
|
|
if (vp->v_bfs_e == ROOT_FS_E) {
|
|
|
|
/* Invalidate the cache unless the special is
|
|
|
|
* mounted. Assume that the root filesystem's
|
|
|
|
* is open only for fsck.
|
2012-02-13 16:28:04 +01:00
|
|
|
*/
|
|
|
|
req_flush(vp->v_bfs_e, dev);
|
|
|
|
}
|
|
|
|
unlock_bsf();
|
Split block/character protocols and libdriver
This patch separates the character and block driver communication
protocols. The old character protocol remains the same, but a new
block protocol is introduced. The libdriver library is replaced by
two new libraries: libchardriver and libblockdriver. Their exposed
API, and drivers that use them, have been updated accordingly.
Together, libbdev and libblockdriver now completely abstract away
the message format used by the block protocol. As the memory driver
is both a character and a block device driver, it now implements its
own message loop.
The most important semantic change made to the block protocol is that
it is no longer possible to return both partial results and an error
for a single transfer. This simplifies the interaction between the
caller and the driver, as the I/O vector no longer needs to be copied
back. Also, drivers are now no longer supposed to decide based on the
layout of the I/O vector when a transfer should be cut short. Put
simply, transfers are now supposed to either succeed completely, or
result in an error.
After this patch, the state of the various pieces is as follows:
- block protocol: stable
- libbdev API: stable for synchronous communication
- libblockdriver API: needs slight revision (the drvlib/partition API
in particular; the threading API will also change shortly)
- character protocol: needs cleanup
- libchardriver API: needs cleanup accordingly
- driver restarts: largely unsupported until endpoint changes are
reintroduced
As a side effect, this patch eliminates several bugs, hacks, and gcc
-Wall and -W warnings all over the place. It probably introduces a
few new ones, too.
Update warning: this patch changes the protocol between MFS and disk
drivers, so in order to use old/new images, the MFS from the ramdisk
must be used to mount all file systems.
2011-11-22 13:27:53 +01:00
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
/* Attempt to close only when feasible */
|
|
|
|
if (!(f->filp_state & FS_INVALIDATED)) {
|
|
|
|
(void) bdev_close(dev); /* Ignore errors */
|
|
|
|
}
|
2012-04-25 14:44:42 +02:00
|
|
|
} else {
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
/* Attempt to close only when feasible */
|
|
|
|
if (!(f->filp_state & FS_INVALIDATED)) {
|
|
|
|
(void) dev_close(dev, f-filp);/*Ignore errors*/
|
|
|
|
}
|
2012-04-25 14:44:42 +02:00
|
|
|
}
|
2010-08-30 15:44:07 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
f->filp_mode = FILP_CLOSED;
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the inode being closed is a pipe, release everyone hanging on it. */
|
2012-04-16 11:04:32 +02:00
|
|
|
if (S_ISFIFO(vp->v_mode)) {
|
2012-02-13 16:28:04 +01:00
|
|
|
rw = (f->filp_mode & R_BIT ? WRITE : READ);
|
2012-04-23 15:33:43 +02:00
|
|
|
release(vp, rw, susp_count);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
f->filp_count--; /* If filp got invalidated at device closure, the
|
|
|
|
* count might've become negative now */
|
|
|
|
if (f->filp_count == 0 ||
|
|
|
|
(f->filp_count < 0 && f->filp_state & FS_INVALIDATED)) {
|
2012-04-16 11:04:32 +02:00
|
|
|
if (S_ISFIFO(vp->v_mode)) {
|
2010-08-30 15:44:07 +02:00
|
|
|
/* Last reader or writer is going. Tell PFS about latest
|
|
|
|
* pipe size.
|
|
|
|
*/
|
|
|
|
truncate_vnode(vp, vp->v_size);
|
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
unlock_vnode(f->filp_vno);
|
|
|
|
put_vnode(f->filp_vno);
|
2012-04-13 11:08:39 +02:00
|
|
|
f->filp_vno = NULL;
|
|
|
|
f->filp_mode = FILP_CLOSED;
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
f->filp_count = 0;
|
2012-02-13 16:28:04 +01:00
|
|
|
} else if (f->filp_count < 0) {
|
2013-08-26 18:43:05 +02:00
|
|
|
panic("VFS: invalid filp count: %d ino %d/%u",
|
|
|
|
(int) f->filp_count, (int) vp->v_dev, (unsigned int) vp->v_inode_nr);
|
2012-02-13 16:28:04 +01:00
|
|
|
} else {
|
|
|
|
unlock_vnode(f->filp_vno);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
mutex_unlock(&f->filp_lock);
|
2010-08-30 15:44:07 +02:00
|
|
|
}
|