2011-08-17 15:23:45 +02:00
|
|
|
#include "fs.h"
|
|
|
|
#include <assert.h>
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
static void worker_get_work(void);
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *worker_main(void *arg);
|
2013-08-30 14:00:50 +02:00
|
|
|
static void worker_sleep(void);
|
2012-03-25 20:25:53 +02:00
|
|
|
static void worker_wake(struct worker_thread *worker);
|
|
|
|
static mthread_attr_t tattr;
|
2011-08-17 15:23:45 +02:00
|
|
|
|
|
|
|
#ifdef MKCOVERAGE
|
2013-05-28 21:29:08 +02:00
|
|
|
# define TH_STACKSIZE (40 * 1024)
|
2011-08-17 15:23:45 +02:00
|
|
|
#else
|
2013-05-28 21:29:08 +02:00
|
|
|
# define TH_STACKSIZE (28 * 1024)
|
2011-08-17 15:23:45 +02:00
|
|
|
#endif
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
#define ASSERTW(w) assert((w) >= &workers[0] && (w) < &workers[NR_WTHREADS])
|
2011-08-17 15:23:45 +02:00
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* worker_init *
|
|
|
|
*===========================================================================*/
|
2013-08-30 14:00:50 +02:00
|
|
|
void worker_init(void)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
/* Initialize worker thread */
|
2013-08-30 14:00:50 +02:00
|
|
|
struct worker_thread *wp;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
threads_init();
|
|
|
|
if (mthread_attr_init(&tattr) != 0)
|
|
|
|
panic("failed to initialize attribute");
|
|
|
|
if (mthread_attr_setstacksize(&tattr, TH_STACKSIZE) != 0)
|
|
|
|
panic("couldn't set default thread stack size");
|
|
|
|
if (mthread_attr_setdetachstate(&tattr, MTHREAD_CREATE_DETACHED) != 0)
|
|
|
|
panic("couldn't set default thread detach state");
|
|
|
|
pending = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < NR_WTHREADS; i++) {
|
|
|
|
wp = &workers[i];
|
|
|
|
|
|
|
|
wp->w_fp = NULL; /* Mark not in use */
|
|
|
|
wp->w_next = NULL;
|
|
|
|
if (mutex_init(&wp->w_event_mutex, NULL) != 0)
|
|
|
|
panic("failed to initialize mutex");
|
|
|
|
if (cond_init(&wp->w_event, NULL) != 0)
|
|
|
|
panic("failed to initialize conditional variable");
|
|
|
|
if (mthread_create(&wp->w_tid, &tattr, worker_main, (void *) wp) != 0)
|
|
|
|
panic("unable to start thread");
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Let all threads get ready to accept work. */
|
|
|
|
yield_all();
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2013-08-30 14:00:50 +02:00
|
|
|
* worker_get_work *
|
2011-08-17 15:23:45 +02:00
|
|
|
*===========================================================================*/
|
2013-08-30 14:00:50 +02:00
|
|
|
static void worker_get_work(void)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
/* Find new work to do. Work can be 'queued', 'pending', or absent. In the
|
2013-08-30 14:00:50 +02:00
|
|
|
* latter case wait for new work to come in.
|
|
|
|
*/
|
2011-08-17 15:23:45 +02:00
|
|
|
struct fproc *rfp;
|
|
|
|
|
|
|
|
/* Do we have queued work to do? */
|
2013-08-30 14:00:50 +02:00
|
|
|
if (pending > 0) {
|
2011-08-17 15:23:45 +02:00
|
|
|
/* Find pending work */
|
|
|
|
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
|
|
|
|
if (rfp->fp_flags & FP_PENDING) {
|
2013-08-30 14:00:50 +02:00
|
|
|
self->w_fp = rfp;
|
|
|
|
rfp->fp_worker = self;
|
2011-08-17 15:23:45 +02:00
|
|
|
rfp->fp_flags &= ~FP_PENDING; /* No longer pending */
|
|
|
|
pending--;
|
|
|
|
assert(pending >= 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
panic("Pending work inconsistency");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Wait for work to come to us */
|
2013-08-30 14:00:50 +02:00
|
|
|
worker_sleep();
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2013-08-30 14:00:50 +02:00
|
|
|
* worker_available *
|
2011-08-17 15:23:45 +02:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int worker_available(void)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
int busy, i;
|
|
|
|
|
|
|
|
busy = 0;
|
|
|
|
for (i = 0; i < NR_WTHREADS; i++) {
|
2013-08-30 14:00:50 +02:00
|
|
|
if (workers[i].w_fp != NULL)
|
2011-08-17 15:23:45 +02:00
|
|
|
busy++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return(NR_WTHREADS - busy);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* worker_main *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *worker_main(void *arg)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
/* Worker thread main loop */
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
self = (struct worker_thread *) arg;
|
|
|
|
ASSERTW(self);
|
2011-08-17 15:23:45 +02:00
|
|
|
|
|
|
|
while(TRUE) {
|
2013-08-30 14:00:50 +02:00
|
|
|
worker_get_work();
|
|
|
|
|
|
|
|
fp = self->w_fp;
|
|
|
|
assert(fp->fp_worker == self);
|
|
|
|
|
|
|
|
/* Lock the process. */
|
|
|
|
lock_proc(fp);
|
|
|
|
|
|
|
|
/* The following two blocks could be run in a loop until both the
|
|
|
|
* conditions are no longer met, but it is currently impossible that
|
|
|
|
* more normal work is present after postponed PM work has been done.
|
|
|
|
*/
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Perform normal work, if any. */
|
|
|
|
if (fp->fp_func != NULL) {
|
|
|
|
self->w_msg = fp->fp_msg;
|
|
|
|
err_code = OK;
|
|
|
|
|
|
|
|
fp->fp_func();
|
|
|
|
|
|
|
|
fp->fp_func = NULL; /* deliberately unset AFTER the call */
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Perform postponed PM work, if any. */
|
|
|
|
if (fp->fp_flags & FP_PM_WORK) {
|
|
|
|
self->w_msg = fp->fp_pm_msg;
|
|
|
|
|
|
|
|
service_pm_postponed();
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
fp->fp_flags &= ~FP_PM_WORK;
|
2012-04-13 14:50:38 +02:00
|
|
|
}
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Perform cleanup actions. */
|
|
|
|
thread_cleanup();
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
unlock_proc(fp);
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
fp->fp_worker = NULL;
|
|
|
|
self->w_fp = NULL;
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
return(NULL); /* Unreachable */
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2013-08-30 14:00:50 +02:00
|
|
|
* worker_can_start *
|
2011-08-17 15:23:45 +02:00
|
|
|
*===========================================================================*/
|
2013-08-30 14:00:50 +02:00
|
|
|
int worker_can_start(struct fproc *rfp)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Return whether normal (non-PM) work can be started for the given process.
|
|
|
|
* This function is used to serialize invocation of "special" procedures, and
|
|
|
|
* not entirely safe for other cases, as explained in the comments below.
|
|
|
|
*/
|
|
|
|
int is_pending, is_active, has_normal_work, has_pm_work;
|
|
|
|
|
|
|
|
is_pending = (rfp->fp_flags & FP_PENDING);
|
|
|
|
is_active = (rfp->fp_worker != NULL);
|
|
|
|
has_normal_work = (rfp->fp_func != NULL);
|
|
|
|
has_pm_work = (rfp->fp_flags & FP_PM_WORK);
|
|
|
|
|
|
|
|
/* If there is no work scheduled for the process, we can start work. */
|
|
|
|
if (!is_pending && !is_active) return TRUE;
|
|
|
|
|
|
|
|
/* If there is already normal work scheduled for the process, we cannot add
|
|
|
|
* more, since we support only one normal job per process.
|
|
|
|
*/
|
|
|
|
if (has_normal_work) return FALSE;
|
|
|
|
|
|
|
|
/* If this process has pending PM work but no normal work, we can add the
|
|
|
|
* normal work for execution before the worker will start.
|
|
|
|
*/
|
|
|
|
if (is_pending) return TRUE;
|
|
|
|
|
|
|
|
/* However, if a worker is active for PM work, we cannot add normal work
|
|
|
|
* either, because the work will not be considered. For this reason, we can
|
|
|
|
* not use this function for processes that can possibly get postponed PM
|
|
|
|
* work. It is still safe for core system processes, though.
|
|
|
|
*/
|
|
|
|
return FALSE;
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2013-08-30 14:00:50 +02:00
|
|
|
* worker_try_activate *
|
2011-08-17 15:23:45 +02:00
|
|
|
*===========================================================================*/
|
2013-08-30 14:00:50 +02:00
|
|
|
static void worker_try_activate(struct fproc *rfp, int use_spare)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
2013-08-30 14:00:50 +02:00
|
|
|
/* See if we can wake up a thread to do the work scheduled for the given
|
|
|
|
* process. If not, mark the process as having pending work for later.
|
|
|
|
*/
|
|
|
|
int i, available, needed;
|
2011-08-17 15:23:45 +02:00
|
|
|
struct worker_thread *worker;
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Use the last available thread only if requested. Otherwise, leave at least
|
|
|
|
* one spare thread for deadlock resolution.
|
|
|
|
*/
|
|
|
|
needed = use_spare ? 1 : 2;
|
2012-02-09 15:24:28 +01:00
|
|
|
|
2011-08-17 15:23:45 +02:00
|
|
|
worker = NULL;
|
2013-08-30 14:00:50 +02:00
|
|
|
for (i = available = 0; i < NR_WTHREADS; i++) {
|
|
|
|
if (workers[i].w_fp == NULL) {
|
|
|
|
if (worker == NULL)
|
|
|
|
worker = &workers[i];
|
|
|
|
if (++available >= needed)
|
|
|
|
break;
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
if (available >= needed) {
|
|
|
|
assert(worker != NULL);
|
|
|
|
rfp->fp_worker = worker;
|
|
|
|
worker->w_fp = rfp;
|
2011-08-17 15:23:45 +02:00
|
|
|
worker_wake(worker);
|
2013-08-30 14:00:50 +02:00
|
|
|
} else {
|
|
|
|
rfp->fp_flags |= FP_PENDING;
|
|
|
|
pending++;
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
2013-08-30 14:00:50 +02:00
|
|
|
}
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* worker_start *
|
|
|
|
*===========================================================================*/
|
|
|
|
void worker_start(struct fproc *rfp, void (*func)(void), message *m_ptr,
|
|
|
|
int use_spare)
|
|
|
|
{
|
|
|
|
/* Schedule work to be done by a worker thread. The work is bound to the given
|
|
|
|
* process. If a function pointer is given, the work is considered normal work,
|
|
|
|
* and the function will be called to handle it. If the function pointer is
|
|
|
|
* NULL, the work is considered postponed PM work, and service_pm_postponed
|
|
|
|
* will be called to handle it. The input message will be a copy of the given
|
|
|
|
* message. Optionally, the last spare (deadlock-resolving) thread may be used
|
|
|
|
* to execute the work immediately.
|
|
|
|
*/
|
|
|
|
int is_pm_work, is_pending, is_active, has_normal_work, has_pm_work;
|
|
|
|
|
|
|
|
assert(rfp != NULL);
|
|
|
|
|
|
|
|
is_pm_work = (func == NULL);
|
|
|
|
is_pending = (rfp->fp_flags & FP_PENDING);
|
|
|
|
is_active = (rfp->fp_worker != NULL);
|
|
|
|
has_normal_work = (rfp->fp_func != NULL);
|
|
|
|
has_pm_work = (rfp->fp_flags & FP_PM_WORK);
|
|
|
|
|
|
|
|
/* Sanity checks. If any of these trigger, someone messed up badly! */
|
|
|
|
if (is_pending || is_active) {
|
|
|
|
if (is_pending && is_active)
|
|
|
|
panic("work cannot be both pending and active");
|
|
|
|
|
|
|
|
/* The process cannot make more than one call at once. */
|
|
|
|
if (!is_pm_work && has_normal_work)
|
|
|
|
panic("process has two calls (%x, %x)",
|
|
|
|
rfp->fp_msg.m_type, m_ptr->m_type);
|
|
|
|
|
|
|
|
/* PM will not send more than one job per process to us at once. */
|
|
|
|
if (is_pm_work && has_pm_work)
|
|
|
|
panic("got two calls from PM (%x, %x)",
|
|
|
|
rfp->fp_pm_msg.m_type, m_ptr->m_type);
|
|
|
|
|
|
|
|
/* Despite PM's sys_delay_stop() system, it is possible that normal
|
|
|
|
* work (in particular, do_pending_pipe) arrives after postponed PM
|
|
|
|
* work has been scheduled for execution, so we don't check for that.
|
|
|
|
*/
|
|
|
|
#if 0
|
|
|
|
printf("VFS: adding %s work to %s thread\n",
|
|
|
|
is_pm_work ? "PM" : "normal",
|
|
|
|
is_pending ? "pending" : "active");
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
/* Some cleanup step forgotten somewhere? */
|
|
|
|
if (has_normal_work || has_pm_work)
|
|
|
|
panic("worker administration error");
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
2013-08-30 14:00:50 +02:00
|
|
|
|
|
|
|
/* Save the work to be performed. */
|
|
|
|
if (!is_pm_work) {
|
|
|
|
rfp->fp_msg = *m_ptr;
|
|
|
|
rfp->fp_func = func;
|
|
|
|
} else {
|
|
|
|
rfp->fp_pm_msg = *m_ptr;
|
|
|
|
rfp->fp_flags |= FP_PM_WORK;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we have not only added to existing work, go look for a free thread.
|
|
|
|
* Note that we won't be using the spare thread for normal work if there is
|
|
|
|
* already PM work pending, but that situation will never occur in practice.
|
|
|
|
*/
|
|
|
|
if (!is_pending && !is_active)
|
|
|
|
worker_try_activate(rfp, use_spare);
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* worker_sleep *
|
|
|
|
*===========================================================================*/
|
2013-08-30 14:00:50 +02:00
|
|
|
static void worker_sleep(void)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
2013-08-30 14:00:50 +02:00
|
|
|
struct worker_thread *worker = self;
|
2011-08-17 15:23:45 +02:00
|
|
|
ASSERTW(worker);
|
2011-12-09 15:46:10 +01:00
|
|
|
if (mutex_lock(&worker->w_event_mutex) != 0)
|
|
|
|
panic("unable to lock event mutex");
|
|
|
|
if (cond_wait(&worker->w_event, &worker->w_event_mutex) != 0)
|
|
|
|
panic("could not wait on conditional variable");
|
|
|
|
if (mutex_unlock(&worker->w_event_mutex) != 0)
|
|
|
|
panic("unable to unlock event mutex");
|
2011-08-17 15:23:45 +02:00
|
|
|
self = worker;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* worker_wake *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void worker_wake(struct worker_thread *worker)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
/* Signal a worker to wake up */
|
|
|
|
ASSERTW(worker);
|
2011-12-09 15:46:10 +01:00
|
|
|
if (mutex_lock(&worker->w_event_mutex) != 0)
|
|
|
|
panic("unable to lock event mutex");
|
|
|
|
if (cond_signal(&worker->w_event) != 0)
|
|
|
|
panic("unable to signal conditional variable");
|
|
|
|
if (mutex_unlock(&worker->w_event_mutex) != 0)
|
|
|
|
panic("unable to unlock event mutex");
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* worker_suspend *
|
|
|
|
*===========================================================================*/
|
|
|
|
struct worker_thread *worker_suspend(void)
|
|
|
|
{
|
|
|
|
/* Suspend the current thread, saving certain thread variables. Return a
|
|
|
|
* pointer to the thread's worker structure for later resumption.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ASSERTW(self);
|
|
|
|
assert(fp != NULL);
|
|
|
|
assert(self->w_fp == fp);
|
|
|
|
assert(fp->fp_worker == self);
|
|
|
|
|
|
|
|
self->w_err_code = err_code;
|
|
|
|
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* worker_resume *
|
|
|
|
*===========================================================================*/
|
|
|
|
void worker_resume(struct worker_thread *org_self)
|
|
|
|
{
|
|
|
|
/* Resume the current thread after suspension, restoring thread variables. */
|
|
|
|
|
|
|
|
ASSERTW(org_self);
|
|
|
|
|
|
|
|
self = org_self;
|
|
|
|
|
|
|
|
fp = self->w_fp;
|
|
|
|
assert(fp != NULL);
|
|
|
|
|
|
|
|
err_code = self->w_err_code;
|
|
|
|
}
|
|
|
|
|
2011-08-17 15:23:45 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* worker_wait *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void worker_wait(void)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Put the current thread to sleep until woken up by the main thread. */
|
|
|
|
|
|
|
|
(void) worker_suspend(); /* worker_sleep already saves and restores 'self' */
|
|
|
|
|
|
|
|
worker_sleep();
|
|
|
|
|
2011-08-17 15:23:45 +02:00
|
|
|
/* We continue here after waking up */
|
2013-08-30 14:00:50 +02:00
|
|
|
worker_resume(self);
|
2012-04-13 14:50:38 +02:00
|
|
|
assert(self->w_next == NULL);
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* worker_signal *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void worker_signal(struct worker_thread *worker)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
ASSERTW(worker); /* Make sure we have a valid thread */
|
|
|
|
worker_wake(worker);
|
|
|
|
}
|
|
|
|
|
2011-09-06 12:11:18 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* worker_stop *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void worker_stop(struct worker_thread *worker)
|
2011-09-06 12:11:18 +02:00
|
|
|
{
|
|
|
|
ASSERTW(worker); /* Make sure we have a valid thread */
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
if (worker->w_task != NONE) {
|
|
|
|
/* This thread is communicating with a driver or file server */
|
|
|
|
if (worker->w_drv_sendrec != NULL) { /* Driver */
|
|
|
|
worker->w_drv_sendrec->m_type = EIO;
|
|
|
|
} else if (worker->w_fs_sendrec != NULL) { /* FS */
|
|
|
|
worker->w_fs_sendrec->m_type = EIO;
|
|
|
|
} else {
|
|
|
|
panic("reply storage consistency error"); /* Oh dear */
|
|
|
|
}
|
|
|
|
} else {
|
2013-08-30 14:00:50 +02:00
|
|
|
/* This shouldn't happen at all... */
|
|
|
|
printf("VFS: stopping worker not blocked on any task?\n");
|
|
|
|
util_stacktrace();
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
}
|
2011-09-06 12:11:18 +02:00
|
|
|
worker_wake(worker);
|
|
|
|
}
|
|
|
|
|
2011-09-15 17:39:13 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* worker_stop_by_endpt *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void worker_stop_by_endpt(endpoint_t proc_e)
|
2011-09-15 17:39:13 +02:00
|
|
|
{
|
|
|
|
struct worker_thread *worker;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (proc_e == NONE) return;
|
|
|
|
|
|
|
|
for (i = 0; i < NR_WTHREADS; i++) {
|
|
|
|
worker = &workers[i];
|
2013-08-30 14:00:50 +02:00
|
|
|
if (worker->w_fp != NULL && worker->w_task == proc_e)
|
2011-09-15 17:39:13 +02:00
|
|
|
worker_stop(worker);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-17 15:23:45 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* worker_get *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
struct worker_thread *worker_get(thread_t worker_tid)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
for (i = 0; i < NR_WTHREADS; i++)
|
|
|
|
if (workers[i].w_tid == worker_tid)
|
|
|
|
return(&workers[i]);
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
return(NULL);
|
2011-08-17 15:23:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2013-08-30 14:00:50 +02:00
|
|
|
* worker_set_proc *
|
2011-08-17 15:23:45 +02:00
|
|
|
*===========================================================================*/
|
2013-08-30 14:00:50 +02:00
|
|
|
void worker_set_proc(struct fproc *rfp)
|
2011-08-17 15:23:45 +02:00
|
|
|
{
|
2013-08-30 14:00:50 +02:00
|
|
|
/* Perform an incredibly ugly action that completely violates the threading
|
|
|
|
* model: change the current working thread's process context to another
|
|
|
|
* process. The caller is expected to hold the lock to both the calling and the
|
|
|
|
* target process, and neither process is expected to continue regular
|
|
|
|
* operation when done. This code is here *only* and *strictly* for the reboot
|
|
|
|
* code, and *must not* be used for anything else.
|
|
|
|
*/
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
if (fp == rfp) return;
|
2011-08-17 15:23:45 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
if (rfp->fp_worker != NULL)
|
|
|
|
panic("worker_set_proc: target process not idle");
|
2011-09-15 17:39:13 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
fp->fp_worker = NULL;
|
2011-09-15 17:39:13 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
fp = rfp;
|
2011-09-15 17:39:13 +02:00
|
|
|
|
2013-08-30 14:00:50 +02:00
|
|
|
self->w_fp = rfp;
|
|
|
|
fp->fp_worker = self;
|
2011-09-15 17:39:13 +02:00
|
|
|
}
|