VFS: fix locking bugs
.sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
This commit is contained in:
parent
ecf9b40841
commit
7c8b3ddfed
12 changed files with 83 additions and 18 deletions
|
@ -325,6 +325,12 @@ tll_access_t locktype;
|
||||||
assert(filp->filp_softlock == NULL);
|
assert(filp->filp_softlock == NULL);
|
||||||
filp->filp_softlock = fp;
|
filp->filp_softlock = fp;
|
||||||
} else {
|
} else {
|
||||||
|
/* We have to make an exception for vnodes belonging to pipes. Even
|
||||||
|
* read(2) operations on pipes change the vnode and therefore require
|
||||||
|
* exclusive access.
|
||||||
|
*/
|
||||||
|
if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ)
|
||||||
|
locktype = VNODE_WRITE;
|
||||||
lock_vnode(vp, locktype);
|
lock_vnode(vp, locktype);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -165,7 +165,7 @@ int do_unlink()
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(vmp != NULL);
|
assert(vmp != NULL);
|
||||||
tll_upgrade(&vmp->m_lock);
|
upgrade_vmnt_lock(vmp);
|
||||||
|
|
||||||
if (job_call_nr == UNLINK)
|
if (job_call_nr == UNLINK)
|
||||||
r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
|
r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
|
||||||
|
@ -261,7 +261,7 @@ int do_rename()
|
||||||
(r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1;
|
(r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1;
|
||||||
|
|
||||||
if (r == OK) {
|
if (r == OK) {
|
||||||
tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */
|
upgrade_vmnt_lock(oldvmp); /* Upgrade to exclusive access */
|
||||||
r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
|
r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
|
||||||
new_dirp->v_inode_nr, fullpath);
|
new_dirp->v_inode_nr, fullpath);
|
||||||
}
|
}
|
||||||
|
|
|
@ -304,7 +304,7 @@ int do_sync()
|
||||||
int r = OK;
|
int r = OK;
|
||||||
|
|
||||||
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
||||||
if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK)
|
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
|
||||||
break;
|
break;
|
||||||
if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE &&
|
if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE &&
|
||||||
vmp->m_root_node != NULL) {
|
vmp->m_root_node != NULL) {
|
||||||
|
@ -331,19 +331,21 @@ int do_fsync()
|
||||||
|
|
||||||
if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
|
if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
|
||||||
return(err_code);
|
return(err_code);
|
||||||
|
|
||||||
dev = rfilp->filp_vno->v_dev;
|
dev = rfilp->filp_vno->v_dev;
|
||||||
|
unlock_filp(rfilp);
|
||||||
|
|
||||||
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
||||||
|
if (vmp->m_dev != dev) continue;
|
||||||
|
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
|
||||||
|
break;
|
||||||
if (vmp->m_dev != NO_DEV && vmp->m_dev == dev &&
|
if (vmp->m_dev != NO_DEV && vmp->m_dev == dev &&
|
||||||
vmp->m_fs_e != NONE && vmp->m_root_node != NULL) {
|
vmp->m_fs_e != NONE && vmp->m_root_node != NULL) {
|
||||||
|
|
||||||
if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK)
|
|
||||||
break;
|
|
||||||
req_sync(vmp->m_fs_e);
|
req_sync(vmp->m_fs_e);
|
||||||
|
}
|
||||||
unlock_vmnt(vmp);
|
unlock_vmnt(vmp);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
unlock_filp(rfilp);
|
|
||||||
|
|
||||||
return(r);
|
return(r);
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,6 +172,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode)
|
||||||
if (oflags & O_TRUNC) {
|
if (oflags & O_TRUNC) {
|
||||||
if ((r = forbidden(fp, vp, W_BIT)) != OK)
|
if ((r = forbidden(fp, vp, W_BIT)) != OK)
|
||||||
break;
|
break;
|
||||||
|
upgrade_vnode_lock(vp);
|
||||||
truncate_vnode(vp, 0);
|
truncate_vnode(vp, 0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -243,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode)
|
||||||
case S_IFIFO:
|
case S_IFIFO:
|
||||||
/* Create a mapped inode on PFS which handles reads
|
/* Create a mapped inode on PFS which handles reads
|
||||||
and writes to this named pipe. */
|
and writes to this named pipe. */
|
||||||
tll_upgrade(&vp->v_lock);
|
upgrade_vnode_lock(vp);
|
||||||
r = map_vnode(vp, PFS_PROC_NR);
|
r = map_vnode(vp, PFS_PROC_NR);
|
||||||
if (r == OK) {
|
if (r == OK) {
|
||||||
if (vp->v_ref_count == 1) {
|
if (vp->v_ref_count == 1) {
|
||||||
|
@ -374,6 +375,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
lock_vnode(vp, VNODE_OPCL);
|
lock_vnode(vp, VNODE_OPCL);
|
||||||
|
upgrade_vmnt_lock(dir_vmp); /* Creating file, need exclusive access */
|
||||||
|
|
||||||
if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK ||
|
if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK ||
|
||||||
(r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
|
(r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
|
||||||
|
@ -381,9 +383,14 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
|
||||||
/* Can't create inode either due to permissions or some other
|
/* Can't create inode either due to permissions or some other
|
||||||
* problem. In case r is EEXIST, we might be dealing with a
|
* problem. In case r is EEXIST, we might be dealing with a
|
||||||
* dangling symlink.*/
|
* dangling symlink.*/
|
||||||
|
|
||||||
|
/* Downgrade lock to prevent deadlock during symlink resolving*/
|
||||||
|
downgrade_vmnt_lock(dir_vmp);
|
||||||
|
|
||||||
if (r == EEXIST) {
|
if (r == EEXIST) {
|
||||||
struct vnode *slp, *old_wd;
|
struct vnode *slp, *old_wd;
|
||||||
|
|
||||||
|
|
||||||
/* Resolve path up to symlink */
|
/* Resolve path up to symlink */
|
||||||
findnode.l_flags = PATH_RET_SYMLINK;
|
findnode.l_flags = PATH_RET_SYMLINK;
|
||||||
findnode.l_vnode_lock = VNODE_READ;
|
findnode.l_vnode_lock = VNODE_READ;
|
||||||
|
|
|
@ -398,6 +398,7 @@ struct fproc *rfp;
|
||||||
struct vnode *dir_vp;
|
struct vnode *dir_vp;
|
||||||
struct vmnt *vmp, *vmpres;
|
struct vmnt *vmp, *vmpres;
|
||||||
struct lookup_res res;
|
struct lookup_res res;
|
||||||
|
tll_access_t mnt_lock_type;
|
||||||
|
|
||||||
assert(resolve->l_vmp);
|
assert(resolve->l_vmp);
|
||||||
assert(resolve->l_vnode);
|
assert(resolve->l_vnode);
|
||||||
|
@ -435,7 +436,12 @@ struct fproc *rfp;
|
||||||
symloop = 0; /* Number of symlinks seen so far */
|
symloop = 0; /* Number of symlinks seen so far */
|
||||||
|
|
||||||
/* Lock vmnt */
|
/* Lock vmnt */
|
||||||
if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
|
if (resolve->l_vmnt_lock == VMNT_READ)
|
||||||
|
mnt_lock_type = VMNT_WRITE;
|
||||||
|
else
|
||||||
|
mnt_lock_type = resolve->l_vmnt_lock;
|
||||||
|
|
||||||
|
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
|
||||||
if (r == EBUSY) /* vmnt already locked */
|
if (r == EBUSY) /* vmnt already locked */
|
||||||
vmpres = NULL;
|
vmpres = NULL;
|
||||||
else
|
else
|
||||||
|
@ -532,7 +538,7 @@ struct fproc *rfp;
|
||||||
if (vmpres) unlock_vmnt(vmpres);
|
if (vmpres) unlock_vmnt(vmpres);
|
||||||
vmpres = find_vmnt(fs_e);
|
vmpres = find_vmnt(fs_e);
|
||||||
if (vmpres == NULL) return(EIO); /* mount point vanished? */
|
if (vmpres == NULL) return(EIO); /* mount point vanished? */
|
||||||
if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
|
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
|
||||||
if (r == EBUSY)
|
if (r == EBUSY)
|
||||||
vmpres = NULL; /* Already locked */
|
vmpres = NULL; /* Already locked */
|
||||||
else
|
else
|
||||||
|
@ -549,6 +555,11 @@ struct fproc *rfp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) {
|
||||||
|
/* downgrade VMNT_WRITE to VMNT_READ */
|
||||||
|
downgrade_vmnt_lock(*(resolve->l_vmp));
|
||||||
|
}
|
||||||
|
|
||||||
/* Fill in response fields */
|
/* Fill in response fields */
|
||||||
result_node->inode_nr = res.inode_nr;
|
result_node->inode_nr = res.inode_nr;
|
||||||
result_node->fmode = res.fmode;
|
result_node->fmode = res.fmode;
|
||||||
|
|
|
@ -52,7 +52,7 @@ int do_pipe()
|
||||||
|
|
||||||
/* Get a lock on PFS */
|
/* Get a lock on PFS */
|
||||||
if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
|
if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
|
||||||
if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) return(r);
|
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r);
|
||||||
|
|
||||||
/* See if a free vnode is available */
|
/* See if a free vnode is available */
|
||||||
if ((vp = get_free_vnode()) == NULL) {
|
if ((vp = get_free_vnode()) == NULL) {
|
||||||
|
|
|
@ -316,6 +316,8 @@ int lock_vmnt(struct vmnt *vp, tll_access_t locktype);
|
||||||
void unlock_vmnt(struct vmnt *vp);
|
void unlock_vmnt(struct vmnt *vp);
|
||||||
void vmnt_unmap_by_endpt(endpoint_t proc_e);
|
void vmnt_unmap_by_endpt(endpoint_t proc_e);
|
||||||
void fetch_vmnt_paths(void);
|
void fetch_vmnt_paths(void);
|
||||||
|
void upgrade_vmnt_lock(struct vmnt *vmp);
|
||||||
|
void downgrade_vmnt_lock(struct vmnt *vmp);
|
||||||
|
|
||||||
/* vnode.c */
|
/* vnode.c */
|
||||||
void check_vnode_locks(void);
|
void check_vnode_locks(void);
|
||||||
|
@ -329,6 +331,7 @@ void unlock_vnode(struct vnode *vp);
|
||||||
void dup_vnode(struct vnode *vp);
|
void dup_vnode(struct vnode *vp);
|
||||||
void put_vnode(struct vnode *vp);
|
void put_vnode(struct vnode *vp);
|
||||||
void vnode_clean_refs(struct vnode *vp);
|
void vnode_clean_refs(struct vnode *vp);
|
||||||
|
void upgrade_vnode_lock(struct vnode *vp);
|
||||||
|
|
||||||
/* write.c */
|
/* write.c */
|
||||||
int do_write(void);
|
int do_write(void);
|
||||||
|
|
|
@ -267,7 +267,7 @@ size_t req_size;
|
||||||
u64_t position, new_pos;
|
u64_t position, new_pos;
|
||||||
|
|
||||||
/* Must make sure we're operating on locked filp and vnode */
|
/* Must make sure we're operating on locked filp and vnode */
|
||||||
assert(tll_islocked(&f->filp_vno->v_lock));
|
assert(tll_locked_by_me(&f->filp_vno->v_lock));
|
||||||
assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
|
assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
|
||||||
|
|
||||||
oflags = f->filp_flags;
|
oflags = f->filp_flags;
|
||||||
|
|
|
@ -41,7 +41,7 @@ int do_utime()
|
||||||
if (len == 0) len = (size_t) job_m_in.utime_strlen;
|
if (len == 0) len = (size_t) job_m_in.utime_strlen;
|
||||||
|
|
||||||
lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
|
lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
|
||||||
resolve.l_vmnt_lock = VMNT_WRITE;
|
resolve.l_vmnt_lock = VMNT_READ;
|
||||||
resolve.l_vnode_lock = VNODE_READ;
|
resolve.l_vnode_lock = VNODE_READ;
|
||||||
|
|
||||||
/* Temporarily open the file */
|
/* Temporarily open the file */
|
||||||
|
|
|
@ -185,13 +185,15 @@ int tll_lock(tll_t *tllp, tll_access_t locktype)
|
||||||
* request queued ("write bias") or when a read-serialized lock is trying to
|
* request queued ("write bias") or when a read-serialized lock is trying to
|
||||||
* upgrade to write-only. The current lock for this tll is either read or
|
* upgrade to write-only. The current lock for this tll is either read or
|
||||||
* read-serialized. */
|
* read-serialized. */
|
||||||
if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR))
|
if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) {
|
||||||
|
assert(!(tllp->t_status & TLL_PEND));
|
||||||
return tll_append(tllp, locktype);
|
return tll_append(tllp, locktype);
|
||||||
|
}
|
||||||
|
|
||||||
/* If this lock is in read-serialized mode, we can allow read requests and
|
/* If this lock is in read-serialized mode, we can allow read requests and
|
||||||
* queue read-serialized requests */
|
* queue read-serialized requests */
|
||||||
if (tllp->t_current == TLL_READSER) {
|
if (tllp->t_current == TLL_READSER) {
|
||||||
if (locktype == TLL_READ) {
|
if (locktype == TLL_READ && !(tllp->t_status & TLL_UPGR)) {
|
||||||
tllp->t_readonly++;
|
tllp->t_readonly++;
|
||||||
return(OK);
|
return(OK);
|
||||||
} else
|
} else
|
||||||
|
|
|
@ -164,7 +164,7 @@ int lock_vmnt(struct vmnt *vmp, tll_access_t locktype)
|
||||||
if (r == EBUSY) return(r);
|
if (r == EBUSY) return(r);
|
||||||
|
|
||||||
if (initial_locktype != locktype) {
|
if (initial_locktype != locktype) {
|
||||||
tll_upgrade(&vmp->m_lock);
|
upgrade_vmnt_lock(vmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LOCK_DEBUG
|
#if LOCK_DEBUG
|
||||||
|
@ -216,6 +216,31 @@ void unlock_vmnt(struct vmnt *vmp)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*===========================================================================*
|
||||||
|
* downgrade_vmnt_lock *
|
||||||
|
*===========================================================================*/
|
||||||
|
void downgrade_vmnt_lock(struct vmnt *vmp)
|
||||||
|
{
|
||||||
|
ASSERTVMP(vmp);
|
||||||
|
tll_downgrade(&vmp->m_lock);
|
||||||
|
|
||||||
|
#if LOCK_DEBUG
|
||||||
|
/* If we're no longer the owner of a lock, we downgraded to VMNT_READ */
|
||||||
|
if (!tll_locked_by_me(&vmp->m_lock)) {
|
||||||
|
fp->fp_vmnt_rdlocks++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/*===========================================================================*
|
||||||
|
* upgrade_vmnt_lock *
|
||||||
|
*===========================================================================*/
|
||||||
|
void upgrade_vmnt_lock(struct vmnt *vmp)
|
||||||
|
{
|
||||||
|
ASSERTVMP(vmp);
|
||||||
|
tll_upgrade(&vmp->m_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*===========================================================================*
|
/*===========================================================================*
|
||||||
* fetch_vmnt_paths *
|
* fetch_vmnt_paths *
|
||||||
*===========================================================================*/
|
*===========================================================================*/
|
||||||
|
|
|
@ -212,6 +212,15 @@ void unlock_vnode(struct vnode *vp)
|
||||||
tll_unlock(&vp->v_lock);
|
tll_unlock(&vp->v_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*===========================================================================*
|
||||||
|
* vnode *
|
||||||
|
*===========================================================================*/
|
||||||
|
void upgrade_vnode_lock(struct vnode *vp)
|
||||||
|
{
|
||||||
|
ASSERTVP(vp);
|
||||||
|
tll_upgrade(&vp->v_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*===========================================================================*
|
/*===========================================================================*
|
||||||
* dup_vnode *
|
* dup_vnode *
|
||||||
*===========================================================================*/
|
*===========================================================================*/
|
||||||
|
@ -259,7 +268,7 @@ void put_vnode(struct vnode *vp)
|
||||||
|
|
||||||
/* If we already had a lock, there is a consistency problem */
|
/* If we already had a lock, there is a consistency problem */
|
||||||
assert(lock_vp != EBUSY);
|
assert(lock_vp != EBUSY);
|
||||||
tll_upgrade(&vp->v_lock); /* Make sure nobody else accesses this vnode */
|
upgrade_vnode_lock(vp); /* Acquire exclusive access */
|
||||||
|
|
||||||
/* A vnode that's not in use can't be put back. */
|
/* A vnode that's not in use can't be put back. */
|
||||||
if (vp->v_ref_count <= 0)
|
if (vp->v_ref_count <= 0)
|
||||||
|
|
Loading…
Reference in a new issue