minix/servers/vfs/vnode.c

315 lines
8.7 KiB
C
Raw Normal View History

/* This file contains the routines related to vnodes.
* The entry points are:
2012-02-13 16:28:04 +01:00
*
* get_vnode - increase counter and get details of an inode
* get_free_vnode - get a pointer to a free vnode obj
2012-02-13 16:28:04 +01:00
* find_vnode - find a vnode according to the FS endpoint and the inode num.
* dup_vnode - duplicate vnode (i.e. increase counter)
2012-02-13 16:28:04 +01:00
* put_vnode - drop vnode (i.e. decrease counter)
*/
#include "fs.h"
#include "vnode.h"
#include "vmnt.h"
2007-01-05 17:36:55 +01:00
#include "file.h"
#include <minix/vfsif.h>
2012-02-13 16:28:04 +01:00
#include <assert.h>
/* Is vnode pointer reasonable? */
#if NDEBUG
#define SANEVP(v)
#define CHECKVN(v)
#define ASSERTVP(v)
#else
#define SANEVP(v) ((((v) >= &vnode[0] && (v) < &vnode[NR_VNODES])))
2010-07-02 14:41:19 +02:00
#define BADVP(v, f, l) printf("%s:%d: bad vp %p\n", f, l, v)
/* vp check that returns 0 for use in check_vrefs() */
#define CHECKVN(v) if(!SANEVP(v)) { \
BADVP(v, __FILE__, __LINE__); \
return 0; \
}
/* vp check that panics */
#define ASSERTVP(v) if(!SANEVP(v)) { \
BADVP(v, __FILE__, __LINE__); panic("bad vp"); }
#endif
2007-01-05 17:36:55 +01:00
2012-02-13 16:28:04 +01:00
#if LOCK_DEBUG
/*===========================================================================*
* check_vnode_locks_by_me *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void check_vnode_locks_by_me(struct fproc *rfp)
2012-02-13 16:28:04 +01:00
{
/* Check whether this thread still has locks held on vnodes */
struct vnode *vp;
for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) {
if (tll_locked_by_me(&vp->v_lock)) {
make vfs & filesystems use failable copying Change the kernel to add features to vircopy and safecopies so that transparent copy fixing won't happen to avoid deadlocks, and such copies fail with EFAULT. Transparently making copying work from filesystems (as normally done by the kernel & VM when copying fails because of missing/readonly memory) is problematic as it can happen that, for file-mapped ranges, that that same filesystem that is blocked on the copy request is needed to satisfy the memory range, leading to deadlock. Dito for VFS itself, if done with a blocking call. This change makes the copying done from a filesystem fail in such cases with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call fails with EFAULT, VFS will then request the range to be made available to VM after the FS is unblocked, allowing it to be used to satisfy the range if need be in another VFS thread. Similarly, for datacopies that VFS itself does, it uses the failable vircopy variant and callers use a wrapper that talk to VM if necessary to get the copy to work. . kernel: add CPF_TRY flag to safecopies . kernel: only request writable ranges to VM for the target buffer when copying fails . do copying in VFS TRY-first . some fixes in VM to build SANITYCHECK mode . add regression test for the cases where - a FS system call needs memory mapped in a process that the FS itself must map. - such a range covers more than one file-mapped region. . add 'try' mode to vircopy, physcopy . add flags field to copy kernel call messages . if CP_FLAG_TRY is set, do not transparently try to fix memory ranges . for use by VFS when accessing user buffers to avoid deadlock . remove some obsolete backwards compatability assignments . VFS: let thread scheduling work for VM requests too Allows VFS to make calls to VM while suspending and resuming the currently running thread. Does currently not work for the main thread. . VM: add fix memory range call for use by VFS Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
panic("Thread %d still holds vnode lock on vp %p call_nr=%d\n",
mthread_self(), vp, job_call_nr);
2012-02-13 16:28:04 +01:00
}
}
if (rfp->fp_vp_rdlocks != 0)
panic("Thread %d still holds read locks on a vnode (%d) call_nr=%d\n",
mthread_self(), rfp->fp_vp_rdlocks, job_call_nr);
2012-02-13 16:28:04 +01:00
}
#endif
/*===========================================================================*
* check_vnode_locks *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void check_vnode_locks()
2012-02-13 16:28:04 +01:00
{
struct vnode *vp;
int count = 0;
for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
if (is_vnode_locked(vp)) {
count++;
}
if (count) panic("%d locked vnodes\n", count);
#if 0
printf("check_vnode_locks OK\n");
#endif
}
/*===========================================================================*
* get_free_vnode *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
struct vnode *get_free_vnode()
{
/* Find a free vnode slot in the vnode table (it's not actually allocated) */
struct vnode *vp;
for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
2012-02-13 16:28:04 +01:00
if (vp->v_ref_count == 0 && !is_vnode_locked(vp)) {
vp->v_uid = -1;
vp->v_gid = -1;
vp->v_sdev = NO_DEV;
2012-02-13 16:28:04 +01:00
vp->v_mapfs_e = NONE;
vp->v_mapfs_count = 0;
vp->v_mapinode_nr = 0;
return(vp);
2007-01-05 17:36:55 +01:00
}
2012-02-13 16:28:04 +01:00
}
2007-01-05 17:36:55 +01:00
err_code = ENFILE;
return(NULL);
}
/*===========================================================================*
* find_vnode *
*===========================================================================*/
struct vnode *find_vnode(int fs_e, ino_t ino)
{
/* Find a specified (FS endpoint and inode number) vnode in the
* vnode table */
struct vnode *vp;
for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
2012-02-13 16:28:04 +01:00
if (vp->v_ref_count > 0 && vp->v_inode_nr == ino && vp->v_fs_e == fs_e)
return(vp);
2012-02-13 16:28:04 +01:00
return(NULL);
}
2012-02-13 16:28:04 +01:00
/*===========================================================================*
* is_vnode_locked *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
int is_vnode_locked(struct vnode *vp)
2012-02-13 16:28:04 +01:00
{
/* Find out whether a thread holds a lock on this vnode or is trying to obtain
* a lock. */
ASSERTVP(vp);
return(tll_islocked(&vp->v_lock) || tll_haspendinglock(&vp->v_lock));
}
/*===========================================================================*
* init_vnodes *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void init_vnodes(void)
2012-02-13 16:28:04 +01:00
{
struct vnode *vp;
for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
vp->v_fs_e = NONE;
vp->v_mapfs_e = NONE;
vp->v_inode_nr = 0;
vp->v_ref_count = 0;
vp->v_fs_count = 0;
vp->v_mapfs_count = 0;
tll_init(&vp->v_lock);
}
}
/*===========================================================================*
* lock_vnode *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
int lock_vnode(struct vnode *vp, tll_access_t locktype)
2012-02-13 16:28:04 +01:00
{
int r;
ASSERTVP(vp);
r = tll_lock(&vp->v_lock, locktype);
#if LOCK_DEBUG
if (locktype == VNODE_READ) {
fp->fp_vp_rdlocks++;
}
#endif
if (r == EBUSY) return(r);
return(OK);
}
/*===========================================================================*
* unlock_vnode *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void unlock_vnode(struct vnode *vp)
2012-02-13 16:28:04 +01:00
{
#if LOCK_DEBUG
int i;
register struct vnode *rvp;
struct worker_thread *w;
#endif
ASSERTVP(vp);
#if LOCK_DEBUG
/* Decrease read-only lock counter when not locked as VNODE_OPCL or
* VNODE_WRITE */
if (!tll_locked_by_me(&vp->v_lock)) {
fp->fp_vp_rdlocks--;
}
for (i = 0; i < NR_VNODES; i++) {
rvp = &vnode[i];
w = rvp->v_lock.t_write;
assert(w != self);
while (w && w->w_next != NULL) {
w = w->w_next;
assert(w != self);
}
w = rvp->v_lock.t_serial;
assert(w != self);
while (w && w->w_next != NULL) {
w = w->w_next;
assert(w != self);
}
}
#endif
tll_unlock(&vp->v_lock);
}
VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
2012-11-30 13:49:53 +01:00
/*===========================================================================*
* vnode *
*===========================================================================*/
void upgrade_vnode_lock(struct vnode *vp)
{
ASSERTVP(vp);
tll_upgrade(&vp->v_lock);
}
/*===========================================================================*
* dup_vnode *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void dup_vnode(struct vnode *vp)
{
/* dup_vnode() is called to increment the vnode and therefore the
* referred inode's counter.
*/
ASSERTVP(vp);
2007-01-05 17:36:55 +01:00
vp->v_ref_count++;
}
/*===========================================================================*
* put_vnode *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void put_vnode(struct vnode *vp)
{
2012-02-13 16:28:04 +01:00
/* Decrease vnode's usage counter and decrease inode's usage counter in the
* corresponding FS process. Decreasing the fs_count each time we decrease the
* ref count would lead to poor performance. Instead, only decrease fs_count
* when the ref count hits zero. However, this could lead to fs_count to wrap.
* To prevent this, we drop the counter to 1 when the counter hits 256.
* We maintain fs_count as a sanity check to make sure VFS and the FS are in
* sync.
*/
2012-02-13 16:28:04 +01:00
int r, lock_vp;
ASSERTVP(vp);
2012-02-13 16:28:04 +01:00
/* Lock vnode. It's quite possible this thread already has a lock on this
* vnode. That's no problem, because the reference counter will not decrease
* to zero in that case. However, if the counter does decrease to zero *and*
* is already locked, we have a consistency problem somewhere. */
lock_vp = lock_vnode(vp, VNODE_OPCL);
if (vp->v_ref_count > 1) {
2007-01-05 17:36:55 +01:00
/* Decrease counter */
vp->v_ref_count--;
2012-02-13 16:28:04 +01:00
if (vp->v_fs_count > 256)
2007-01-05 17:36:55 +01:00
vnode_clean_refs(vp);
2012-02-13 16:28:04 +01:00
if (lock_vp != EBUSY) unlock_vnode(vp);
2007-01-05 17:36:55 +01:00
return;
}
2012-02-13 16:28:04 +01:00
/* If we already had a lock, there is a consistency problem */
assert(lock_vp != EBUSY);
VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
2012-11-30 13:49:53 +01:00
upgrade_vnode_lock(vp); /* Acquire exclusive access */
2012-02-13 16:28:04 +01:00
/* A vnode that's not in use can't be put back. */
if (vp->v_ref_count <= 0)
panic("put_vnode failed: bad v_ref_count %d\n", vp->v_ref_count);
/* fs_count should indicate that the file is in use. */
2012-02-13 16:28:04 +01:00
if (vp->v_fs_count <= 0)
panic("put_vnode failed: bad v_fs_count %d\n", vp->v_fs_count);
2007-01-05 17:36:55 +01:00
/* Tell FS we don't need this inode to be open anymore. */
2012-02-13 16:28:04 +01:00
r = req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count);
2007-01-05 17:36:55 +01:00
2012-02-13 16:28:04 +01:00
if (r != OK) {
printf("VFS: putnode failed: %d\n", r);
util_stacktrace();
}
2012-02-13 16:28:04 +01:00
/* This inode could've been mapped. If so, tell mapped FS to close it as
* well. If mapped onto same FS, this putnode is not needed. */
if (vp->v_mapfs_e != NONE && vp->v_mapfs_e != vp->v_fs_e)
req_putnode(vp->v_mapfs_e, vp->v_mapinode_nr, vp->v_mapfs_count);
vp->v_fs_count = 0;
vp->v_ref_count = 0;
vp->v_mapfs_count = 0;
2012-02-13 16:28:04 +01:00
unlock_vnode(vp);
}
2007-01-05 17:36:55 +01:00
/*===========================================================================*
* vnode_clean_refs *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void vnode_clean_refs(struct vnode *vp)
2007-01-05 17:36:55 +01:00
{
/* Tell the underlying FS to drop all reference but one. */
if (vp == NULL) return;
if (vp->v_fs_count <= 1) return; /* Nothing to do */
2007-01-05 17:36:55 +01:00
/* Drop all references except one */
req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count - 1);
vp->v_fs_count = 1;
2007-01-05 17:36:55 +01:00
}