VFS: fix locking bugs
.sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
This commit is contained in:
parent
ecf9b40841
commit
7c8b3ddfed
|
@ -325,6 +325,12 @@ tll_access_t locktype;
|
|||
assert(filp->filp_softlock == NULL);
|
||||
filp->filp_softlock = fp;
|
||||
} else {
|
||||
/* We have to make an exception for vnodes belonging to pipes. Even
|
||||
* read(2) operations on pipes change the vnode and therefore require
|
||||
* exclusive access.
|
||||
*/
|
||||
if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ)
|
||||
locktype = VNODE_WRITE;
|
||||
lock_vnode(vp, locktype);
|
||||
}
|
||||
|
||||
|
|
|
@ -165,7 +165,7 @@ int do_unlink()
|
|||
}
|
||||
|
||||
assert(vmp != NULL);
|
||||
tll_upgrade(&vmp->m_lock);
|
||||
upgrade_vmnt_lock(vmp);
|
||||
|
||||
if (job_call_nr == UNLINK)
|
||||
r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
|
||||
|
@ -261,7 +261,7 @@ int do_rename()
|
|||
(r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1;
|
||||
|
||||
if (r == OK) {
|
||||
tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */
|
||||
upgrade_vmnt_lock(oldvmp); /* Upgrade to exclusive access */
|
||||
r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
|
||||
new_dirp->v_inode_nr, fullpath);
|
||||
}
|
||||
|
|
|
@ -304,7 +304,7 @@ int do_sync()
|
|||
int r = OK;
|
||||
|
||||
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
||||
if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK)
|
||||
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
|
||||
break;
|
||||
if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE &&
|
||||
vmp->m_root_node != NULL) {
|
||||
|
@ -331,20 +331,22 @@ int do_fsync()
|
|||
|
||||
if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
|
||||
return(err_code);
|
||||
|
||||
dev = rfilp->filp_vno->v_dev;
|
||||
unlock_filp(rfilp);
|
||||
|
||||
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
||||
if (vmp->m_dev != dev) continue;
|
||||
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
|
||||
break;
|
||||
if (vmp->m_dev != NO_DEV && vmp->m_dev == dev &&
|
||||
vmp->m_fs_e != NONE && vmp->m_root_node != NULL) {
|
||||
|
||||
if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK)
|
||||
break;
|
||||
req_sync(vmp->m_fs_e);
|
||||
unlock_vmnt(vmp);
|
||||
}
|
||||
unlock_vmnt(vmp);
|
||||
}
|
||||
|
||||
unlock_filp(rfilp);
|
||||
|
||||
return(r);
|
||||
}
|
||||
|
||||
|
|
|
@ -172,6 +172,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode)
|
|||
if (oflags & O_TRUNC) {
|
||||
if ((r = forbidden(fp, vp, W_BIT)) != OK)
|
||||
break;
|
||||
upgrade_vnode_lock(vp);
|
||||
truncate_vnode(vp, 0);
|
||||
}
|
||||
break;
|
||||
|
@ -243,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode)
|
|||
case S_IFIFO:
|
||||
/* Create a mapped inode on PFS which handles reads
|
||||
and writes to this named pipe. */
|
||||
tll_upgrade(&vp->v_lock);
|
||||
upgrade_vnode_lock(vp);
|
||||
r = map_vnode(vp, PFS_PROC_NR);
|
||||
if (r == OK) {
|
||||
if (vp->v_ref_count == 1) {
|
||||
|
@ -374,6 +375,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
|
|||
}
|
||||
|
||||
lock_vnode(vp, VNODE_OPCL);
|
||||
upgrade_vmnt_lock(dir_vmp); /* Creating file, need exclusive access */
|
||||
|
||||
if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK ||
|
||||
(r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
|
||||
|
@ -381,9 +383,14 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
|
|||
/* Can't create inode either due to permissions or some other
|
||||
* problem. In case r is EEXIST, we might be dealing with a
|
||||
* dangling symlink.*/
|
||||
|
||||
/* Downgrade lock to prevent deadlock during symlink resolving*/
|
||||
downgrade_vmnt_lock(dir_vmp);
|
||||
|
||||
if (r == EEXIST) {
|
||||
struct vnode *slp, *old_wd;
|
||||
|
||||
|
||||
/* Resolve path up to symlink */
|
||||
findnode.l_flags = PATH_RET_SYMLINK;
|
||||
findnode.l_vnode_lock = VNODE_READ;
|
||||
|
|
|
@ -398,6 +398,7 @@ struct fproc *rfp;
|
|||
struct vnode *dir_vp;
|
||||
struct vmnt *vmp, *vmpres;
|
||||
struct lookup_res res;
|
||||
tll_access_t mnt_lock_type;
|
||||
|
||||
assert(resolve->l_vmp);
|
||||
assert(resolve->l_vnode);
|
||||
|
@ -435,7 +436,12 @@ struct fproc *rfp;
|
|||
symloop = 0; /* Number of symlinks seen so far */
|
||||
|
||||
/* Lock vmnt */
|
||||
if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
|
||||
if (resolve->l_vmnt_lock == VMNT_READ)
|
||||
mnt_lock_type = VMNT_WRITE;
|
||||
else
|
||||
mnt_lock_type = resolve->l_vmnt_lock;
|
||||
|
||||
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
|
||||
if (r == EBUSY) /* vmnt already locked */
|
||||
vmpres = NULL;
|
||||
else
|
||||
|
@ -532,7 +538,7 @@ struct fproc *rfp;
|
|||
if (vmpres) unlock_vmnt(vmpres);
|
||||
vmpres = find_vmnt(fs_e);
|
||||
if (vmpres == NULL) return(EIO); /* mount point vanished? */
|
||||
if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
|
||||
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
|
||||
if (r == EBUSY)
|
||||
vmpres = NULL; /* Already locked */
|
||||
else
|
||||
|
@ -549,6 +555,11 @@ struct fproc *rfp;
|
|||
}
|
||||
}
|
||||
|
||||
if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) {
|
||||
/* downgrade VMNT_WRITE to VMNT_READ */
|
||||
downgrade_vmnt_lock(*(resolve->l_vmp));
|
||||
}
|
||||
|
||||
/* Fill in response fields */
|
||||
result_node->inode_nr = res.inode_nr;
|
||||
result_node->fmode = res.fmode;
|
||||
|
|
|
@ -52,7 +52,7 @@ int do_pipe()
|
|||
|
||||
/* Get a lock on PFS */
|
||||
if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
|
||||
if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) return(r);
|
||||
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r);
|
||||
|
||||
/* See if a free vnode is available */
|
||||
if ((vp = get_free_vnode()) == NULL) {
|
||||
|
|
|
@ -316,6 +316,8 @@ int lock_vmnt(struct vmnt *vp, tll_access_t locktype);
|
|||
void unlock_vmnt(struct vmnt *vp);
|
||||
void vmnt_unmap_by_endpt(endpoint_t proc_e);
|
||||
void fetch_vmnt_paths(void);
|
||||
void upgrade_vmnt_lock(struct vmnt *vmp);
|
||||
void downgrade_vmnt_lock(struct vmnt *vmp);
|
||||
|
||||
/* vnode.c */
|
||||
void check_vnode_locks(void);
|
||||
|
@ -329,6 +331,7 @@ void unlock_vnode(struct vnode *vp);
|
|||
void dup_vnode(struct vnode *vp);
|
||||
void put_vnode(struct vnode *vp);
|
||||
void vnode_clean_refs(struct vnode *vp);
|
||||
void upgrade_vnode_lock(struct vnode *vp);
|
||||
|
||||
/* write.c */
|
||||
int do_write(void);
|
||||
|
|
|
@ -267,7 +267,7 @@ size_t req_size;
|
|||
u64_t position, new_pos;
|
||||
|
||||
/* Must make sure we're operating on locked filp and vnode */
|
||||
assert(tll_islocked(&f->filp_vno->v_lock));
|
||||
assert(tll_locked_by_me(&f->filp_vno->v_lock));
|
||||
assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
|
||||
|
||||
oflags = f->filp_flags;
|
||||
|
|
|
@ -41,7 +41,7 @@ int do_utime()
|
|||
if (len == 0) len = (size_t) job_m_in.utime_strlen;
|
||||
|
||||
lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
|
||||
resolve.l_vmnt_lock = VMNT_WRITE;
|
||||
resolve.l_vmnt_lock = VMNT_READ;
|
||||
resolve.l_vnode_lock = VNODE_READ;
|
||||
|
||||
/* Temporarily open the file */
|
||||
|
|
|
@ -185,13 +185,15 @@ int tll_lock(tll_t *tllp, tll_access_t locktype)
|
|||
* request queued ("write bias") or when a read-serialized lock is trying to
|
||||
* upgrade to write-only. The current lock for this tll is either read or
|
||||
* read-serialized. */
|
||||
if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR))
|
||||
if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) {
|
||||
assert(!(tllp->t_status & TLL_PEND));
|
||||
return tll_append(tllp, locktype);
|
||||
}
|
||||
|
||||
/* If this lock is in read-serialized mode, we can allow read requests and
|
||||
* queue read-serialized requests */
|
||||
if (tllp->t_current == TLL_READSER) {
|
||||
if (locktype == TLL_READ) {
|
||||
if (locktype == TLL_READ && !(tllp->t_status & TLL_UPGR)) {
|
||||
tllp->t_readonly++;
|
||||
return(OK);
|
||||
} else
|
||||
|
|
|
@ -164,7 +164,7 @@ int lock_vmnt(struct vmnt *vmp, tll_access_t locktype)
|
|||
if (r == EBUSY) return(r);
|
||||
|
||||
if (initial_locktype != locktype) {
|
||||
tll_upgrade(&vmp->m_lock);
|
||||
upgrade_vmnt_lock(vmp);
|
||||
}
|
||||
|
||||
#if LOCK_DEBUG
|
||||
|
@ -216,6 +216,31 @@ void unlock_vmnt(struct vmnt *vmp)
|
|||
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* downgrade_vmnt_lock *
|
||||
*===========================================================================*/
|
||||
void downgrade_vmnt_lock(struct vmnt *vmp)
|
||||
{
|
||||
ASSERTVMP(vmp);
|
||||
tll_downgrade(&vmp->m_lock);
|
||||
|
||||
#if LOCK_DEBUG
|
||||
/* If we're no longer the owner of a lock, we downgraded to VMNT_READ */
|
||||
if (!tll_locked_by_me(&vmp->m_lock)) {
|
||||
fp->fp_vmnt_rdlocks++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* upgrade_vmnt_lock *
|
||||
*===========================================================================*/
|
||||
void upgrade_vmnt_lock(struct vmnt *vmp)
|
||||
{
|
||||
ASSERTVMP(vmp);
|
||||
tll_upgrade(&vmp->m_lock);
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* fetch_vmnt_paths *
|
||||
*===========================================================================*/
|
||||
|
|
|
@ -212,6 +212,15 @@ void unlock_vnode(struct vnode *vp)
|
|||
tll_unlock(&vp->v_lock);
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* vnode *
|
||||
*===========================================================================*/
|
||||
void upgrade_vnode_lock(struct vnode *vp)
|
||||
{
|
||||
ASSERTVP(vp);
|
||||
tll_upgrade(&vp->v_lock);
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* dup_vnode *
|
||||
*===========================================================================*/
|
||||
|
@ -259,7 +268,7 @@ void put_vnode(struct vnode *vp)
|
|||
|
||||
/* If we already had a lock, there is a consistency problem */
|
||||
assert(lock_vp != EBUSY);
|
||||
tll_upgrade(&vp->v_lock); /* Make sure nobody else accesses this vnode */
|
||||
upgrade_vnode_lock(vp); /* Acquire exclusive access */
|
||||
|
||||
/* A vnode that's not in use can't be put back. */
|
||||
if (vp->v_ref_count <= 0)
|
||||
|
|
Loading…
Reference in a new issue