AVFS: fix various system call interruption issues

- When cancelling ioctls, VFS did not remember which file descriptor
   to cancel and sent bogus to the driver.
 - Select state was not cleaned up when select()ing process was
   interrupted.
 - Process trying to do a system call at the exact same time as a user
   trying to interrupt the process, could cause the system call worker
   thread to overwrite state belonging to the worker thread trying to
   exit the process. This led to hanging threads and eventual system hang
   when this happens often enough.
This commit is contained in:
Thomas Veerman 2012-02-09 14:24:28 +00:00
parent 4498750810
commit abd6043a2f
7 changed files with 74 additions and 47 deletions

View file

@ -547,7 +547,7 @@ PUBLIC int gen_opcl(
if (op == DEV_OPEN && dp->dmap_style == STYLE_DEVA) { if (op == DEV_OPEN && dp->dmap_style == STYLE_DEVA) {
fp->fp_task = dp->dmap_driver; fp->fp_task = dp->dmap_driver;
worker_wait(dp->dmap_driver); worker_wait();
} }
if (is_bdev) if (is_bdev)
@ -650,7 +650,10 @@ PUBLIC int do_ioctl()
register struct vnode *vp; register struct vnode *vp;
dev_t dev; dev_t dev;
if ((f = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code); scratch(fp).file.fd_nr = m_in.ls_fd;
if ((f = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
return(err_code);
vp = f->filp_vno; /* get vnode pointer */ vp = f->filp_vno; /* get vnode pointer */
if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL && if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL &&
(vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) { (vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) {
@ -860,6 +863,7 @@ PUBLIC int clone_opcl(
if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) { if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) {
/* Wait for reply when driver is asynchronous */ /* Wait for reply when driver is asynchronous */
fp->fp_task = dp->dmap_driver;
worker_wait(); worker_wait();
} }

View file

@ -63,6 +63,7 @@ EXTERN struct fproc {
#define FP_EXITING 0020 /* Set if process is exiting */ #define FP_EXITING 0020 /* Set if process is exiting */
#define FP_PM_PENDING 0040 /* Set if process has pending PM request */ #define FP_PM_PENDING 0040 /* Set if process has pending PM request */
#define FP_SYS_PROC 0100 /* Set if process is a driver or FS */ #define FP_SYS_PROC 0100 /* Set if process is a driver or FS */
#define FP_DROP_WORK 0200 /* Set if process won't accept new work */
/* Field values. */ /* Field values. */
#define NOT_REVIVING 0xC0FFEEE /* process is not being revived */ #define NOT_REVIVING 0xC0FFEEE /* process is not being revived */

View file

@ -272,10 +272,11 @@ PRIVATE void *do_fs_reply(struct job *job)
return(NULL); return(NULL);
} }
if (rfp->fp_task != who_e)
printf("AVFS: expected %d to reply, not %d\n", rfp->fp_task, who_e);
*rfp->fp_sendrec = m_in; *rfp->fp_sendrec = m_in;
rfp->fp_task = NONE; rfp->fp_task = NONE;
vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */ vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */ worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */
return(NULL); return(NULL);
} }
@ -424,8 +425,6 @@ PRIVATE void *do_work(void *arg)
*/ */
if (call_nr < 0 || call_nr >= NCALLS) { if (call_nr < 0 || call_nr >= NCALLS) {
error = ENOSYS; error = ENOSYS;
} else if (fp->fp_flags & FP_EXITING) {
error = SUSPEND;
} else if (fp->fp_pid == PID_FREE) { } else if (fp->fp_pid == PID_FREE) {
/* Process vanished before we were able to handle request. /* Process vanished before we were able to handle request.
* Replying has no use. Just drop it. */ * Replying has no use. Just drop it. */
@ -643,8 +642,8 @@ PUBLIC void lock_proc(struct fproc *rfp, int force_lock)
org_m_in = m_in; org_m_in = m_in;
org_fp = fp; org_fp = fp;
org_self = self; org_self = self;
if (mutex_lock(&rfp->fp_lock) != 0) if ((r = mutex_lock(&rfp->fp_lock)) != 0)
panic("unable to lock fproc lock"); panic("unable to lock fproc lock: %d", r);
m_in = org_m_in; m_in = org_m_in;
fp = org_fp; fp = org_fp;
self = org_self; self = org_self;
@ -696,7 +695,10 @@ PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l)
} }
#endif #endif
if (rfp != NULL) unlock_proc(rfp); if (rfp != NULL) {
rfp->fp_flags &= ~FP_DROP_WORK;
unlock_proc(rfp);
}
#if 0 #if 0
mthread_exit(NULL); mthread_exit(NULL);
@ -903,6 +905,7 @@ PRIVATE void service_pm()
if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) { if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) {
mutex_unlock(&fp->fp_lock); mutex_unlock(&fp->fp_lock);
worker_start(do_dummy); worker_start(do_dummy);
fp->fp_flags |= FP_DROP_WORK;
} }
fp->fp_job.j_m_in = m_in; fp->fp_job.j_m_in = m_in;

View file

@ -457,6 +457,8 @@ PRIVATE void free_proc(struct fproc *exiter, int flags)
/* The rest of these actions is only done when processes actually exit. */ /* The rest of these actions is only done when processes actually exit. */
if (!(flags & FP_EXITING)) return; if (!(flags & FP_EXITING)) return;
exiter->fp_flags |= FP_EXITING;
/* Check if any process is SUSPENDed on this driver. /* Check if any process is SUSPENDed on this driver.
* If a driver exits, unmap its entries in the dmap table. * If a driver exits, unmap its entries in the dmap table.
* (unmapping has to be done after the first step, because the * (unmapping has to be done after the first step, because the
@ -471,7 +473,6 @@ PRIVATE void free_proc(struct fproc *exiter, int flags)
/* Invalidate endpoint number for error and sanity checks. */ /* Invalidate endpoint number for error and sanity checks. */
exiter->fp_endpoint = NONE; exiter->fp_endpoint = NONE;
exiter->fp_flags |= FP_EXITING;
/* If a session leader exits and it has a controlling tty, then revoke /* If a session leader exits and it has a controlling tty, then revoke
* access to its controlling tty from all other processes using it. * access to its controlling tty from all other processes using it.

View file

@ -515,8 +515,7 @@ int returned; /* if hanging on task, how many bytes read */
/*===========================================================================* /*===========================================================================*
* unpause * * unpause *
*===========================================================================*/ *===========================================================================*/
PUBLIC void unpause(proc_nr_e) PUBLIC void unpause(endpoint_t proc_e)
int proc_nr_e;
{ {
/* A signal has been sent to a user who is paused on the file system. /* A signal has been sent to a user who is paused on the file system.
* Abort the system call with the EINTR error message. * Abort the system call with the EINTR error message.
@ -529,8 +528,8 @@ int proc_nr_e;
message mess; message mess;
int wasreviving = 0; int wasreviving = 0;
if (isokendpt(proc_nr_e, &slot) != OK) { if (isokendpt(proc_e, &slot) != OK) {
printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e); printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_e);
return; return;
} }
@ -552,7 +551,7 @@ int proc_nr_e;
break; break;
case FP_BLOCKED_ON_SELECT:/* process blocking on select() */ case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
select_forget(proc_nr_e); select_forget(proc_e);
break; break;
case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */ case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */
@ -614,7 +613,7 @@ int proc_nr_e;
susp_count--; susp_count--;
} }
reply(proc_nr_e, status); /* signal interrupted call */ reply(proc_e, status); /* signal interrupted call */
} }
#if DO_SANITYCHECKS #if DO_SANITYCHECKS

View file

@ -593,9 +593,6 @@ PRIVATE void select_cancel_all(struct selectentry *se)
int fd; int fd;
struct filp *f; struct filp *f;
/* Always await results of asynchronous requests */
assert(!is_deferred(se));
for (fd = 0; fd < se->nfds; fd++) { for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) continue; if ((f = se->filps[fd]) == NULL) continue;
se->filps[fd] = NULL; se->filps[fd] = NULL;
@ -620,6 +617,7 @@ PRIVATE void select_cancel_filp(struct filp *f)
assert(f); assert(f);
assert(f->filp_selectors >= 0); assert(f->filp_selectors >= 0);
if (f->filp_selectors == 0) return; if (f->filp_selectors == 0) return;
if (f->filp_count == 0) return;
select_lock_filp(f, f->filp_select_ops); select_lock_filp(f, f->filp_select_ops);
@ -644,6 +642,7 @@ PRIVATE void select_return(struct selectentry *se)
assert(!is_deferred(se)); /* Not done yet, first wait for async reply */ assert(!is_deferred(se)); /* Not done yet, first wait for async reply */
select_cancel_all(se); select_cancel_all(se);
r1 = copy_fdsets(se, se->nfds, TO_PROC); r1 = copy_fdsets(se, se->nfds, TO_PROC);
if (r1 != OK) if (r1 != OK)
r = r1; r = r1;
@ -737,6 +736,11 @@ PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e)
int wakehim = 0; int wakehim = 0;
se = &selecttab[s]; se = &selecttab[s];
if (se->requestor == NULL) continue; if (se->requestor == NULL) continue;
if (se->requestor->fp_endpoint == proc_e) {
assert(se->requestor->fp_flags & FP_EXITING);
select_cancel_all(se);
continue;
}
for (fd = 0; fd < se->nfds; fd++) { for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL) if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL)
@ -756,7 +760,6 @@ PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e)
} }
} }
/*===========================================================================* /*===========================================================================*
* select_reply1 * * select_reply1 *
*===========================================================================*/ *===========================================================================*/
@ -800,39 +803,51 @@ int status;
} }
} }
select_lock_filp(f, f->filp_select_ops);
/* No longer waiting for a reply from this device */ /* No longer waiting for a reply from this device */
f->filp_select_flags &= ~FSF_BUSY;
dp->dmap_sel_filp = NULL; dp->dmap_sel_filp = NULL;
/* The select call is done now, except when /* Process select result only if requestor is still around. That is, the
* - another process started a select on the same filp with possibly a * corresponding filp is still in use.
* different set of operations. */
* - a process does a select on the same filp but using different file if (f->filp_count >= 1) {
* descriptors. select_lock_filp(f, f->filp_select_ops);
* - the select has a timeout. Upon receiving this reply the operations might f->filp_select_flags &= ~FSF_BUSY;
* not be ready yet, so we want to wait for that to ultimately happen.
* Therefore we need to keep remembering what the operations are. */
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (!(f->filp_select_flags & FSF_UPDATE))
f->filp_select_ops &= ~status; /* there may be operations pending */
/* Tell filp owners about result unless we need to wait longer */ /* The select call is done now, except when
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) { * - another process started a select on the same filp with possibly a
if (status > 0) { /* operations ready */ * different set of operations.
if (status & SEL_RD) f->filp_select_flags &= ~FSF_RD_BLOCK; * - a process does a select on the same filp but using different file
if (status & SEL_WR) f->filp_select_flags &= ~FSF_WR_BLOCK; * descriptors.
if (status & SEL_ERR) f->filp_select_flags &= ~FSF_ERR_BLOCK; * - the select has a timeout. Upon receiving this reply the operations
} else if (status < 0) { /* error */ * might not be ready yet, so we want to wait for that to ultimately
f->filp_select_flags &= ~FSF_BLOCKED; /* No longer blocking */ * happen.
* Therefore we need to keep remembering what the operations are.
*/
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (!(f->filp_select_flags & FSF_UPDATE))
/* there may be operations pending */
f->filp_select_ops &= ~status;
/* Tell filp owners about result unless we need to wait longer */
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
if (status > 0) { /* operations ready */
if (status & SEL_RD)
f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR)
f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR)
f->filp_select_flags &= ~FSF_ERR_BLOCK;
} else if (status < 0) { /* error */
/* Always unblock upon error */
f->filp_select_flags &= ~FSF_BLOCKED;
}
unlock_filp(f);
filp_status(f, status); /* Tell filp owners about the results */
} else {
unlock_filp(f);
} }
unlock_filp(f);
filp_status(f, status); /* Tell filp owners about the results */
} else {
unlock_filp(f);
} }
select_restart_filps(); select_restart_filps();

View file

@ -208,6 +208,10 @@ PUBLIC void worker_start(void *(*func)(void *arg))
int i; int i;
struct worker_thread *worker; struct worker_thread *worker;
if (fp->fp_flags & FP_DROP_WORK) {
return; /* This process is not allowed to accept new work */
}
worker = NULL; worker = NULL;
for (i = 0; i < NR_WTHREADS; i++) { for (i = 0; i < NR_WTHREADS; i++) {
if (workers[i].w_job.j_func == NULL) { if (workers[i].w_job.j_func == NULL) {