From 7c8b3ddfedbf67ace632beb1aade5bd94f21f792 Mon Sep 17 00:00:00 2001 From: Thomas Veerman Date: Fri, 30 Nov 2012 12:49:53 +0000 Subject: [PATCH] VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation. --- servers/vfs/filedes.c | 6 ++++++ servers/vfs/link.c | 4 ++-- servers/vfs/misc.c | 14 ++++++++------ servers/vfs/open.c | 9 ++++++++- servers/vfs/path.c | 15 +++++++++++++-- servers/vfs/pipe.c | 2 +- servers/vfs/proto.h | 3 +++ servers/vfs/read.c | 2 +- servers/vfs/time.c | 2 +- servers/vfs/tll.c | 6 ++++-- servers/vfs/vmnt.c | 27 ++++++++++++++++++++++++++- servers/vfs/vnode.c | 11 ++++++++++- 12 files changed, 83 insertions(+), 18 deletions(-) diff --git a/servers/vfs/filedes.c b/servers/vfs/filedes.c index b51642b2c..b0a19911e 100644 --- a/servers/vfs/filedes.c +++ b/servers/vfs/filedes.c @@ -325,6 +325,12 @@ tll_access_t locktype; assert(filp->filp_softlock == NULL); filp->filp_softlock = fp; } else { + /* We have to make an exception for vnodes belonging to pipes. Even + * read(2) operations on pipes change the vnode and therefore require + * exclusive access. + */ + if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ) + locktype = VNODE_WRITE; lock_vnode(vp, locktype); } diff --git a/servers/vfs/link.c b/servers/vfs/link.c index 6a025ccc7..c79f33f3a 100644 --- a/servers/vfs/link.c +++ b/servers/vfs/link.c @@ -165,7 +165,7 @@ int do_unlink() } assert(vmp != NULL); - tll_upgrade(&vmp->m_lock); + upgrade_vmnt_lock(vmp); if (job_call_nr == UNLINK) r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath); @@ -261,7 +261,7 @@ int do_rename() (r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1; if (r == OK) { - tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */ + upgrade_vmnt_lock(oldvmp); /* Upgrade to exclusive access */ r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, new_dirp->v_inode_nr, fullpath); } diff --git a/servers/vfs/misc.c b/servers/vfs/misc.c index a52abe05f..03de68ca4 100644 --- a/servers/vfs/misc.c +++ b/servers/vfs/misc.c @@ -304,7 +304,7 @@ int do_sync() int r = OK; for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { - if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK) + if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) break; if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE && vmp->m_root_node != NULL) { @@ -331,20 +331,22 @@ int do_fsync() if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL) return(err_code); + dev = rfilp->filp_vno->v_dev; + unlock_filp(rfilp); + for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { + if (vmp->m_dev != dev) continue; + if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) + break; if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && vmp->m_fs_e != NONE && vmp->m_root_node != NULL) { - if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK) - break; req_sync(vmp->m_fs_e); - unlock_vmnt(vmp); } + unlock_vmnt(vmp); } - unlock_filp(rfilp); - return(r); } diff --git a/servers/vfs/open.c b/servers/vfs/open.c index bd945371c..0bc7690ff 100644 --- a/servers/vfs/open.c +++ b/servers/vfs/open.c @@ -172,6 +172,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) if (oflags & O_TRUNC) { if ((r = forbidden(fp, vp, W_BIT)) != OK) break; + upgrade_vnode_lock(vp); truncate_vnode(vp, 0); } break; @@ -243,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) case S_IFIFO: /* Create a mapped inode on PFS which handles reads and writes to this named pipe. */ - tll_upgrade(&vp->v_lock); + upgrade_vnode_lock(vp); r = map_vnode(vp, PFS_PROC_NR); if (r == OK) { if (vp->v_ref_count == 1) { @@ -374,6 +375,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) } lock_vnode(vp, VNODE_OPCL); + upgrade_vmnt_lock(dir_vmp); /* Creating file, need exclusive access */ if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK || (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid, @@ -381,9 +383,14 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) /* Can't create inode either due to permissions or some other * problem. In case r is EEXIST, we might be dealing with a * dangling symlink.*/ + + /* Downgrade lock to prevent deadlock during symlink resolving*/ + downgrade_vmnt_lock(dir_vmp); + if (r == EEXIST) { struct vnode *slp, *old_wd; + /* Resolve path up to symlink */ findnode.l_flags = PATH_RET_SYMLINK; findnode.l_vnode_lock = VNODE_READ; diff --git a/servers/vfs/path.c b/servers/vfs/path.c index ba1d81089..aeedbe50d 100644 --- a/servers/vfs/path.c +++ b/servers/vfs/path.c @@ -398,6 +398,7 @@ struct fproc *rfp; struct vnode *dir_vp; struct vmnt *vmp, *vmpres; struct lookup_res res; + tll_access_t mnt_lock_type; assert(resolve->l_vmp); assert(resolve->l_vnode); @@ -435,7 +436,12 @@ struct fproc *rfp; symloop = 0; /* Number of symlinks seen so far */ /* Lock vmnt */ - if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { + if (resolve->l_vmnt_lock == VMNT_READ) + mnt_lock_type = VMNT_WRITE; + else + mnt_lock_type = resolve->l_vmnt_lock; + + if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) { if (r == EBUSY) /* vmnt already locked */ vmpres = NULL; else @@ -532,7 +538,7 @@ struct fproc *rfp; if (vmpres) unlock_vmnt(vmpres); vmpres = find_vmnt(fs_e); if (vmpres == NULL) return(EIO); /* mount point vanished? */ - if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { + if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) { if (r == EBUSY) vmpres = NULL; /* Already locked */ else @@ -549,6 +555,11 @@ struct fproc *rfp; } } + if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) { + /* downgrade VMNT_WRITE to VMNT_READ */ + downgrade_vmnt_lock(*(resolve->l_vmp)); + } + /* Fill in response fields */ result_node->inode_nr = res.inode_nr; result_node->fmode = res.fmode; diff --git a/servers/vfs/pipe.c b/servers/vfs/pipe.c index 13a359a9b..494f229df 100644 --- a/servers/vfs/pipe.c +++ b/servers/vfs/pipe.c @@ -52,7 +52,7 @@ int do_pipe() /* Get a lock on PFS */ if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone"); - if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) return(r); + if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r); /* See if a free vnode is available */ if ((vp = get_free_vnode()) == NULL) { diff --git a/servers/vfs/proto.h b/servers/vfs/proto.h index d4277332f..4b1f2f220 100644 --- a/servers/vfs/proto.h +++ b/servers/vfs/proto.h @@ -316,6 +316,8 @@ int lock_vmnt(struct vmnt *vp, tll_access_t locktype); void unlock_vmnt(struct vmnt *vp); void vmnt_unmap_by_endpt(endpoint_t proc_e); void fetch_vmnt_paths(void); +void upgrade_vmnt_lock(struct vmnt *vmp); +void downgrade_vmnt_lock(struct vmnt *vmp); /* vnode.c */ void check_vnode_locks(void); @@ -329,6 +331,7 @@ void unlock_vnode(struct vnode *vp); void dup_vnode(struct vnode *vp); void put_vnode(struct vnode *vp); void vnode_clean_refs(struct vnode *vp); +void upgrade_vnode_lock(struct vnode *vp); /* write.c */ int do_write(void); diff --git a/servers/vfs/read.c b/servers/vfs/read.c index db6a65558..59cbd2b7a 100644 --- a/servers/vfs/read.c +++ b/servers/vfs/read.c @@ -267,7 +267,7 @@ size_t req_size; u64_t position, new_pos; /* Must make sure we're operating on locked filp and vnode */ - assert(tll_islocked(&f->filp_vno->v_lock)); + assert(tll_locked_by_me(&f->filp_vno->v_lock)); assert(mutex_trylock(&f->filp_lock) == -EDEADLK); oflags = f->filp_flags; diff --git a/servers/vfs/time.c b/servers/vfs/time.c index dffbded57..54d71f2e8 100644 --- a/servers/vfs/time.c +++ b/servers/vfs/time.c @@ -41,7 +41,7 @@ int do_utime() if (len == 0) len = (size_t) job_m_in.utime_strlen; lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); - resolve.l_vmnt_lock = VMNT_WRITE; + resolve.l_vmnt_lock = VMNT_READ; resolve.l_vnode_lock = VNODE_READ; /* Temporarily open the file */ diff --git a/servers/vfs/tll.c b/servers/vfs/tll.c index 68843dd47..93c2ea7fa 100644 --- a/servers/vfs/tll.c +++ b/servers/vfs/tll.c @@ -185,13 +185,15 @@ int tll_lock(tll_t *tllp, tll_access_t locktype) * request queued ("write bias") or when a read-serialized lock is trying to * upgrade to write-only. The current lock for this tll is either read or * read-serialized. */ - if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) + if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) { + assert(!(tllp->t_status & TLL_PEND)); return tll_append(tllp, locktype); + } /* If this lock is in read-serialized mode, we can allow read requests and * queue read-serialized requests */ if (tllp->t_current == TLL_READSER) { - if (locktype == TLL_READ) { + if (locktype == TLL_READ && !(tllp->t_status & TLL_UPGR)) { tllp->t_readonly++; return(OK); } else diff --git a/servers/vfs/vmnt.c b/servers/vfs/vmnt.c index 6a074fb99..00e9a7dc0 100644 --- a/servers/vfs/vmnt.c +++ b/servers/vfs/vmnt.c @@ -164,7 +164,7 @@ int lock_vmnt(struct vmnt *vmp, tll_access_t locktype) if (r == EBUSY) return(r); if (initial_locktype != locktype) { - tll_upgrade(&vmp->m_lock); + upgrade_vmnt_lock(vmp); } #if LOCK_DEBUG @@ -216,6 +216,31 @@ void unlock_vmnt(struct vmnt *vmp) } +/*===========================================================================* + * downgrade_vmnt_lock * + *===========================================================================*/ +void downgrade_vmnt_lock(struct vmnt *vmp) +{ + ASSERTVMP(vmp); + tll_downgrade(&vmp->m_lock); + +#if LOCK_DEBUG + /* If we're no longer the owner of a lock, we downgraded to VMNT_READ */ + if (!tll_locked_by_me(&vmp->m_lock)) { + fp->fp_vmnt_rdlocks++; + } +#endif +} + +/*===========================================================================* + * upgrade_vmnt_lock * + *===========================================================================*/ +void upgrade_vmnt_lock(struct vmnt *vmp) +{ + ASSERTVMP(vmp); + tll_upgrade(&vmp->m_lock); +} + /*===========================================================================* * fetch_vmnt_paths * *===========================================================================*/ diff --git a/servers/vfs/vnode.c b/servers/vfs/vnode.c index 7d8b4dc1d..022711748 100644 --- a/servers/vfs/vnode.c +++ b/servers/vfs/vnode.c @@ -212,6 +212,15 @@ void unlock_vnode(struct vnode *vp) tll_unlock(&vp->v_lock); } +/*===========================================================================* + * vnode * + *===========================================================================*/ +void upgrade_vnode_lock(struct vnode *vp) +{ + ASSERTVP(vp); + tll_upgrade(&vp->v_lock); +} + /*===========================================================================* * dup_vnode * *===========================================================================*/ @@ -259,7 +268,7 @@ void put_vnode(struct vnode *vp) /* If we already had a lock, there is a consistency problem */ assert(lock_vp != EBUSY); - tll_upgrade(&vp->v_lock); /* Make sure nobody else accesses this vnode */ + upgrade_vnode_lock(vp); /* Acquire exclusive access */ /* A vnode that's not in use can't be put back. */ if (vp->v_ref_count <= 0)