AVFS: fix various system call interruption issues

- When cancelling ioctls, VFS did not remember which file descriptor
   to cancel and sent bogus to the driver.
 - Select state was not cleaned up when select()ing process was
   interrupted.
 - Process trying to do a system call at the exact same time as a user
   trying to interrupt the process, could cause the system call worker
   thread to overwrite state belonging to the worker thread trying to
   exit the process. This led to hanging threads and eventual system hang
   when this happens often enough.
This commit is contained in:
Thomas Veerman 2012-02-09 14:24:28 +00:00
parent 4498750810
commit abd6043a2f
7 changed files with 74 additions and 47 deletions

View file

@ -547,7 +547,7 @@ PUBLIC int gen_opcl(
if (op == DEV_OPEN && dp->dmap_style == STYLE_DEVA) {
fp->fp_task = dp->dmap_driver;
worker_wait(dp->dmap_driver);
worker_wait();
}
if (is_bdev)
@ -650,7 +650,10 @@ PUBLIC int do_ioctl()
register struct vnode *vp;
dev_t dev;
if ((f = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
scratch(fp).file.fd_nr = m_in.ls_fd;
if ((f = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
return(err_code);
vp = f->filp_vno; /* get vnode pointer */
if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL &&
(vp->v_mode & I_TYPE) != I_BLOCK_SPECIAL) {
@ -860,6 +863,7 @@ PUBLIC int clone_opcl(
if (op == DEV_OPEN && dp->dmap_style == STYLE_CLONE_A) {
/* Wait for reply when driver is asynchronous */
fp->fp_task = dp->dmap_driver;
worker_wait();
}

View file

@ -63,6 +63,7 @@ EXTERN struct fproc {
#define FP_EXITING 0020 /* Set if process is exiting */
#define FP_PM_PENDING 0040 /* Set if process has pending PM request */
#define FP_SYS_PROC 0100 /* Set if process is a driver or FS */
#define FP_DROP_WORK 0200 /* Set if process won't accept new work */
/* Field values. */
#define NOT_REVIVING 0xC0FFEEE /* process is not being revived */

View file

@ -272,10 +272,11 @@ PRIVATE void *do_fs_reply(struct job *job)
return(NULL);
}
if (rfp->fp_task != who_e)
printf("AVFS: expected %d to reply, not %d\n", rfp->fp_task, who_e);
*rfp->fp_sendrec = m_in;
rfp->fp_task = NONE;
vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */
return(NULL);
}
@ -424,8 +425,6 @@ PRIVATE void *do_work(void *arg)
*/
if (call_nr < 0 || call_nr >= NCALLS) {
error = ENOSYS;
} else if (fp->fp_flags & FP_EXITING) {
error = SUSPEND;
} else if (fp->fp_pid == PID_FREE) {
/* Process vanished before we were able to handle request.
* Replying has no use. Just drop it. */
@ -643,8 +642,8 @@ PUBLIC void lock_proc(struct fproc *rfp, int force_lock)
org_m_in = m_in;
org_fp = fp;
org_self = self;
if (mutex_lock(&rfp->fp_lock) != 0)
panic("unable to lock fproc lock");
if ((r = mutex_lock(&rfp->fp_lock)) != 0)
panic("unable to lock fproc lock: %d", r);
m_in = org_m_in;
fp = org_fp;
self = org_self;
@ -696,7 +695,10 @@ PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l)
}
#endif
if (rfp != NULL) unlock_proc(rfp);
if (rfp != NULL) {
rfp->fp_flags &= ~FP_DROP_WORK;
unlock_proc(rfp);
}
#if 0
mthread_exit(NULL);
@ -903,6 +905,7 @@ PRIVATE void service_pm()
if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) {
mutex_unlock(&fp->fp_lock);
worker_start(do_dummy);
fp->fp_flags |= FP_DROP_WORK;
}
fp->fp_job.j_m_in = m_in;

View file

@ -457,6 +457,8 @@ PRIVATE void free_proc(struct fproc *exiter, int flags)
/* The rest of these actions is only done when processes actually exit. */
if (!(flags & FP_EXITING)) return;
exiter->fp_flags |= FP_EXITING;
/* Check if any process is SUSPENDed on this driver.
* If a driver exits, unmap its entries in the dmap table.
* (unmapping has to be done after the first step, because the
@ -471,7 +473,6 @@ PRIVATE void free_proc(struct fproc *exiter, int flags)
/* Invalidate endpoint number for error and sanity checks. */
exiter->fp_endpoint = NONE;
exiter->fp_flags |= FP_EXITING;
/* If a session leader exits and it has a controlling tty, then revoke
* access to its controlling tty from all other processes using it.

View file

@ -515,8 +515,7 @@ int returned; /* if hanging on task, how many bytes read */
/*===========================================================================*
* unpause *
*===========================================================================*/
PUBLIC void unpause(proc_nr_e)
int proc_nr_e;
PUBLIC void unpause(endpoint_t proc_e)
{
/* A signal has been sent to a user who is paused on the file system.
* Abort the system call with the EINTR error message.
@ -529,8 +528,8 @@ int proc_nr_e;
message mess;
int wasreviving = 0;
if (isokendpt(proc_nr_e, &slot) != OK) {
printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e);
if (isokendpt(proc_e, &slot) != OK) {
printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_e);
return;
}
@ -552,7 +551,7 @@ int proc_nr_e;
break;
case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
select_forget(proc_nr_e);
select_forget(proc_e);
break;
case FP_BLOCKED_ON_POPEN: /* process trying to open a fifo */
@ -614,7 +613,7 @@ int proc_nr_e;
susp_count--;
}
reply(proc_nr_e, status); /* signal interrupted call */
reply(proc_e, status); /* signal interrupted call */
}
#if DO_SANITYCHECKS

View file

@ -593,9 +593,6 @@ PRIVATE void select_cancel_all(struct selectentry *se)
int fd;
struct filp *f;
/* Always await results of asynchronous requests */
assert(!is_deferred(se));
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) continue;
se->filps[fd] = NULL;
@ -620,6 +617,7 @@ PRIVATE void select_cancel_filp(struct filp *f)
assert(f);
assert(f->filp_selectors >= 0);
if (f->filp_selectors == 0) return;
if (f->filp_count == 0) return;
select_lock_filp(f, f->filp_select_ops);
@ -644,6 +642,7 @@ PRIVATE void select_return(struct selectentry *se)
assert(!is_deferred(se)); /* Not done yet, first wait for async reply */
select_cancel_all(se);
r1 = copy_fdsets(se, se->nfds, TO_PROC);
if (r1 != OK)
r = r1;
@ -737,6 +736,11 @@ PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e)
int wakehim = 0;
se = &selecttab[s];
if (se->requestor == NULL) continue;
if (se->requestor->fp_endpoint == proc_e) {
assert(se->requestor->fp_flags & FP_EXITING);
select_cancel_all(se);
continue;
}
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL)
@ -756,7 +760,6 @@ PUBLIC void select_unsuspend_by_endpt(endpoint_t proc_e)
}
}
/*===========================================================================*
* select_reply1 *
*===========================================================================*/
@ -800,39 +803,51 @@ int status;
}
}
select_lock_filp(f, f->filp_select_ops);
/* No longer waiting for a reply from this device */
f->filp_select_flags &= ~FSF_BUSY;
dp->dmap_sel_filp = NULL;
/* The select call is done now, except when
* - another process started a select on the same filp with possibly a
* different set of operations.
* - a process does a select on the same filp but using different file
* descriptors.
* - the select has a timeout. Upon receiving this reply the operations might
* not be ready yet, so we want to wait for that to ultimately happen.
* Therefore we need to keep remembering what the operations are. */
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (!(f->filp_select_flags & FSF_UPDATE))
f->filp_select_ops &= ~status; /* there may be operations pending */
/* Process select result only if requestor is still around. That is, the
* corresponding filp is still in use.
*/
if (f->filp_count >= 1) {
select_lock_filp(f, f->filp_select_ops);
f->filp_select_flags &= ~FSF_BUSY;
/* Tell filp owners about result unless we need to wait longer */
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
if (status > 0) { /* operations ready */
if (status & SEL_RD) f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR) f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR) f->filp_select_flags &= ~FSF_ERR_BLOCK;
} else if (status < 0) { /* error */
f->filp_select_flags &= ~FSF_BLOCKED; /* No longer blocking */
/* The select call is done now, except when
* - another process started a select on the same filp with possibly a
* different set of operations.
* - a process does a select on the same filp but using different file
* descriptors.
* - the select has a timeout. Upon receiving this reply the operations
* might not be ready yet, so we want to wait for that to ultimately
* happen.
* Therefore we need to keep remembering what the operations are.
*/
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (!(f->filp_select_flags & FSF_UPDATE))
/* there may be operations pending */
f->filp_select_ops &= ~status;
/* Tell filp owners about result unless we need to wait longer */
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
if (status > 0) { /* operations ready */
if (status & SEL_RD)
f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR)
f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR)
f->filp_select_flags &= ~FSF_ERR_BLOCK;
} else if (status < 0) { /* error */
/* Always unblock upon error */
f->filp_select_flags &= ~FSF_BLOCKED;
}
unlock_filp(f);
filp_status(f, status); /* Tell filp owners about the results */
} else {
unlock_filp(f);
}
unlock_filp(f);
filp_status(f, status); /* Tell filp owners about the results */
} else {
unlock_filp(f);
}
select_restart_filps();

View file

@ -208,6 +208,10 @@ PUBLIC void worker_start(void *(*func)(void *arg))
int i;
struct worker_thread *worker;
if (fp->fp_flags & FP_DROP_WORK) {
return; /* This process is not allowed to accept new work */
}
worker = NULL;
for (i = 0; i < NR_WTHREADS; i++) {
if (workers[i].w_job.j_func == NULL) {