VFS: fix locking bugs

.sync and fsync used unnecessarily restrictive locking type
.fsync violated locking order by obtaining a vmnt lock after a filp lock
.fsync contained a TOCTOU bug
.new_node violated locking rules (didn't upgrade lock upon file creation)
.do_pipe used unnecessarily restrictive locking type
.always lock pipes exclusively; even a read operation might require to do
 a write on a vnode object (update pipe size)
.when opening a file with O_TRUNC, upgrade vnode lock when truncating
.utime used unnecessarily restrictive locking type
.path parsing:
  .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to
   VMNT_READ if that was what was actually requested. This prevents the
   following deadlock scenario:
   thread A:
     lock_vmnt(vmp, TLL_READSER);
     lock_vnode(vp, TLL_READSER);
     upgrade_vmnt_lock(vmp, TLL_WRITE);

   thread B:
     lock_vmnt(vmp, TLL_READ);
     lock_vnode(vp, TLL_READSER);

   thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in
   lock_vnode. This happens when, for example, thread A tries create a
   new node (open.c:new_node) and thread B tries to do eat_path to
   change dir (stadir.c:do_chdir). When the path is being resolved, a
   vnode is always locked with VNODE_OPCL (TLL_READSER) and then
   downgraded to VNODE_READ if read-only is actually requested. Thread
   A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows
   VMNT_READ locks. Thread B can't acquire a lock on the vnode because
   thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE
   (TLL_WRITE) because thread B has a VMNT_READ lock on it.

   By serializing vmnt locks during path parsing, thread B can only
   acquire a lock on vmp when thread A has completely finished its
   operation.
This commit is contained in:
Thomas Veerman 2012-11-30 12:49:53 +00:00
parent ecf9b40841
commit 7c8b3ddfed
12 changed files with 83 additions and 18 deletions

View file

@ -325,6 +325,12 @@ tll_access_t locktype;
assert(filp->filp_softlock == NULL);
filp->filp_softlock = fp;
} else {
/* We have to make an exception for vnodes belonging to pipes. Even
* read(2) operations on pipes change the vnode and therefore require
* exclusive access.
*/
if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ)
locktype = VNODE_WRITE;
lock_vnode(vp, locktype);
}

View file

@ -165,7 +165,7 @@ int do_unlink()
}
assert(vmp != NULL);
tll_upgrade(&vmp->m_lock);
upgrade_vmnt_lock(vmp);
if (job_call_nr == UNLINK)
r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
@ -261,7 +261,7 @@ int do_rename()
(r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1;
if (r == OK) {
tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */
upgrade_vmnt_lock(oldvmp); /* Upgrade to exclusive access */
r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
new_dirp->v_inode_nr, fullpath);
}

View file

@ -304,7 +304,7 @@ int do_sync()
int r = OK;
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK)
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
break;
if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE &&
vmp->m_root_node != NULL) {
@ -331,20 +331,22 @@ int do_fsync()
if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
return(err_code);
dev = rfilp->filp_vno->v_dev;
unlock_filp(rfilp);
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
if (vmp->m_dev != dev) continue;
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
break;
if (vmp->m_dev != NO_DEV && vmp->m_dev == dev &&
vmp->m_fs_e != NONE && vmp->m_root_node != NULL) {
if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK)
break;
req_sync(vmp->m_fs_e);
unlock_vmnt(vmp);
}
unlock_vmnt(vmp);
}
unlock_filp(rfilp);
return(r);
}

View file

@ -172,6 +172,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode)
if (oflags & O_TRUNC) {
if ((r = forbidden(fp, vp, W_BIT)) != OK)
break;
upgrade_vnode_lock(vp);
truncate_vnode(vp, 0);
}
break;
@ -243,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode)
case S_IFIFO:
/* Create a mapped inode on PFS which handles reads
and writes to this named pipe. */
tll_upgrade(&vp->v_lock);
upgrade_vnode_lock(vp);
r = map_vnode(vp, PFS_PROC_NR);
if (r == OK) {
if (vp->v_ref_count == 1) {
@ -374,6 +375,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
}
lock_vnode(vp, VNODE_OPCL);
upgrade_vmnt_lock(dir_vmp); /* Creating file, need exclusive access */
if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK ||
(r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
@ -381,9 +383,14 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
/* Can't create inode either due to permissions or some other
* problem. In case r is EEXIST, we might be dealing with a
* dangling symlink.*/
/* Downgrade lock to prevent deadlock during symlink resolving*/
downgrade_vmnt_lock(dir_vmp);
if (r == EEXIST) {
struct vnode *slp, *old_wd;
/* Resolve path up to symlink */
findnode.l_flags = PATH_RET_SYMLINK;
findnode.l_vnode_lock = VNODE_READ;

View file

@ -398,6 +398,7 @@ struct fproc *rfp;
struct vnode *dir_vp;
struct vmnt *vmp, *vmpres;
struct lookup_res res;
tll_access_t mnt_lock_type;
assert(resolve->l_vmp);
assert(resolve->l_vnode);
@ -435,7 +436,12 @@ struct fproc *rfp;
symloop = 0; /* Number of symlinks seen so far */
/* Lock vmnt */
if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
if (resolve->l_vmnt_lock == VMNT_READ)
mnt_lock_type = VMNT_WRITE;
else
mnt_lock_type = resolve->l_vmnt_lock;
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
if (r == EBUSY) /* vmnt already locked */
vmpres = NULL;
else
@ -532,7 +538,7 @@ struct fproc *rfp;
if (vmpres) unlock_vmnt(vmpres);
vmpres = find_vmnt(fs_e);
if (vmpres == NULL) return(EIO); /* mount point vanished? */
if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
if (r == EBUSY)
vmpres = NULL; /* Already locked */
else
@ -549,6 +555,11 @@ struct fproc *rfp;
}
}
if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) {
/* downgrade VMNT_WRITE to VMNT_READ */
downgrade_vmnt_lock(*(resolve->l_vmp));
}
/* Fill in response fields */
result_node->inode_nr = res.inode_nr;
result_node->fmode = res.fmode;

View file

@ -52,7 +52,7 @@ int do_pipe()
/* Get a lock on PFS */
if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) return(r);
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r);
/* See if a free vnode is available */
if ((vp = get_free_vnode()) == NULL) {

View file

@ -316,6 +316,8 @@ int lock_vmnt(struct vmnt *vp, tll_access_t locktype);
void unlock_vmnt(struct vmnt *vp);
void vmnt_unmap_by_endpt(endpoint_t proc_e);
void fetch_vmnt_paths(void);
void upgrade_vmnt_lock(struct vmnt *vmp);
void downgrade_vmnt_lock(struct vmnt *vmp);
/* vnode.c */
void check_vnode_locks(void);
@ -329,6 +331,7 @@ void unlock_vnode(struct vnode *vp);
void dup_vnode(struct vnode *vp);
void put_vnode(struct vnode *vp);
void vnode_clean_refs(struct vnode *vp);
void upgrade_vnode_lock(struct vnode *vp);
/* write.c */
int do_write(void);

View file

@ -267,7 +267,7 @@ size_t req_size;
u64_t position, new_pos;
/* Must make sure we're operating on locked filp and vnode */
assert(tll_islocked(&f->filp_vno->v_lock));
assert(tll_locked_by_me(&f->filp_vno->v_lock));
assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
oflags = f->filp_flags;

View file

@ -41,7 +41,7 @@ int do_utime()
if (len == 0) len = (size_t) job_m_in.utime_strlen;
lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
resolve.l_vmnt_lock = VMNT_WRITE;
resolve.l_vmnt_lock = VMNT_READ;
resolve.l_vnode_lock = VNODE_READ;
/* Temporarily open the file */

View file

@ -185,13 +185,15 @@ int tll_lock(tll_t *tllp, tll_access_t locktype)
* request queued ("write bias") or when a read-serialized lock is trying to
* upgrade to write-only. The current lock for this tll is either read or
* read-serialized. */
if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR))
if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) {
assert(!(tllp->t_status & TLL_PEND));
return tll_append(tllp, locktype);
}
/* If this lock is in read-serialized mode, we can allow read requests and
* queue read-serialized requests */
if (tllp->t_current == TLL_READSER) {
if (locktype == TLL_READ) {
if (locktype == TLL_READ && !(tllp->t_status & TLL_UPGR)) {
tllp->t_readonly++;
return(OK);
} else

View file

@ -164,7 +164,7 @@ int lock_vmnt(struct vmnt *vmp, tll_access_t locktype)
if (r == EBUSY) return(r);
if (initial_locktype != locktype) {
tll_upgrade(&vmp->m_lock);
upgrade_vmnt_lock(vmp);
}
#if LOCK_DEBUG
@ -216,6 +216,31 @@ void unlock_vmnt(struct vmnt *vmp)
}
/*===========================================================================*
* downgrade_vmnt_lock *
*===========================================================================*/
void downgrade_vmnt_lock(struct vmnt *vmp)
{
ASSERTVMP(vmp);
tll_downgrade(&vmp->m_lock);
#if LOCK_DEBUG
/* If we're no longer the owner of a lock, we downgraded to VMNT_READ */
if (!tll_locked_by_me(&vmp->m_lock)) {
fp->fp_vmnt_rdlocks++;
}
#endif
}
/*===========================================================================*
* upgrade_vmnt_lock *
*===========================================================================*/
void upgrade_vmnt_lock(struct vmnt *vmp)
{
ASSERTVMP(vmp);
tll_upgrade(&vmp->m_lock);
}
/*===========================================================================*
* fetch_vmnt_paths *
*===========================================================================*/

View file

@ -212,6 +212,15 @@ void unlock_vnode(struct vnode *vp)
tll_unlock(&vp->v_lock);
}
/*===========================================================================*
* vnode *
*===========================================================================*/
void upgrade_vnode_lock(struct vnode *vp)
{
ASSERTVP(vp);
tll_upgrade(&vp->v_lock);
}
/*===========================================================================*
* dup_vnode *
*===========================================================================*/
@ -259,7 +268,7 @@ void put_vnode(struct vnode *vp)
/* If we already had a lock, there is a consistency problem */
assert(lock_vp != EBUSY);
tll_upgrade(&vp->v_lock); /* Make sure nobody else accesses this vnode */
upgrade_vnode_lock(vp); /* Acquire exclusive access */
/* A vnode that's not in use can't be put back. */
if (vp->v_ref_count <= 0)