minix/servers/vfs/path.c

878 lines
25 KiB
C
Raw Normal View History

2012-02-13 16:28:04 +01:00
/* lookup() is the main routine that controls the path name lookup. It
* handles mountpoints and symbolic links. The actual lookup requests
* are sent through the req_lookup wrapper function.
*/
#include "fs.h"
#include <string.h>
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/const.h>
#include <minix/endpoint.h>
#include <stddef.h>
#include <unistd.h>
#include <assert.h>
#include <minix/vfsif.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <dirent.h>
#include "vmnt.h"
#include "vnode.h"
2012-02-13 16:28:04 +01:00
#include "path.h"
#include "param.h"
/* Set to following define to 1 if you really want to use the POSIX definition
* (IEEE Std 1003.1, 2004) of pathname resolution. POSIX requires pathnames
* with a traling slash (and that do not entirely consist of slash characters)
* to be treated as if a single dot is appended. This means that for example
* mkdir("dir/", ...) and rmdir("dir/") will fail because the call tries to
* create or remove the directory '.'. Historically, Unix systems just ignore
* trailing slashes.
*/
#define DO_POSIX_PATHNAME_RES 0
2012-03-25 20:25:53 +02:00
static int lookup(struct vnode *dirp, struct lookup *resolve,
node_details_t *node, struct fproc *rfp);
2012-03-25 20:25:53 +02:00
static int check_perms(endpoint_t ep, cp_grant_id_t io_gr, size_t
pathlen);
2007-08-07 14:52:47 +02:00
/*===========================================================================*
* advance *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
struct vnode *advance(dirp, resolve, rfp)
struct vnode *dirp;
2012-02-13 16:28:04 +01:00
struct lookup *resolve;
struct fproc *rfp;
{
2012-02-13 16:28:04 +01:00
/* Resolve a path name starting at dirp to a vnode. */
int r;
2012-02-13 16:28:04 +01:00
int do_downgrade = 1;
2007-08-07 14:52:47 +02:00
struct vnode *new_vp, *vp;
struct vmnt *vmp;
struct node_details res = {0,0,0,0,0,0,0};
2012-02-13 16:28:04 +01:00
tll_access_t initial_locktype;
assert(dirp);
2012-02-13 16:28:04 +01:00
assert(resolve->l_vnode_lock != TLL_NONE);
assert(resolve->l_vmnt_lock != TLL_NONE);
if (resolve->l_vnode_lock == VNODE_READ)
initial_locktype = VNODE_OPCL;
else
initial_locktype = resolve->l_vnode_lock;
/* Get a free vnode and lock it */
if ((new_vp = get_free_vnode()) == NULL) return(NULL);
lock_vnode(new_vp, initial_locktype);
/* Lookup vnode belonging to the file. */
2012-02-13 16:28:04 +01:00
if ((r = lookup(dirp, resolve, &res, rfp)) != OK) {
err_code = r;
2012-02-13 16:28:04 +01:00
unlock_vnode(new_vp);
return(NULL);
2007-08-07 14:52:47 +02:00
}
2012-02-13 16:28:04 +01:00
/* Check whether we already have a vnode for that file */
if ((vp = find_vnode(res.fs_e, res.inode_nr)) != NULL) {
2012-02-13 16:28:04 +01:00
unlock_vnode(new_vp); /* Don't need this anymore */
do_downgrade = (lock_vnode(vp, initial_locktype) != EBUSY);
/* Unfortunately, by the time we get the lock, another thread might've
* rid of the vnode (e.g., find_vnode found the vnode while a
* req_putnode was being processed). */
if (vp->v_ref_count == 0) { /* vnode vanished! */
/* As the lookup before increased the usage counters in the FS,
* we can simply set the usage counters to 1 and proceed as
* normal, because the putnode resulted in a use count of 1 in
* the FS. Other data is still valid, because the vnode was
* marked as pending lock, so get_free_vnode hasn't
* reinitialized the vnode yet. */
vp->v_fs_count = 1;
if (vp->v_mapfs_e != NONE) vp->v_mapfs_count = 1;
} else {
vp->v_fs_count++; /* We got a reference from the FS */
}
} else {
/* Vnode not found, fill in the free vnode's fields */
new_vp->v_fs_e = res.fs_e;
new_vp->v_inode_nr = res.inode_nr;
new_vp->v_mode = res.fmode;
new_vp->v_size = res.fsize;
new_vp->v_uid = res.uid;
new_vp->v_gid = res.gid;
new_vp->v_sdev = res.dev;
if( (vmp = find_vmnt(new_vp->v_fs_e)) == NULL)
panic("advance: vmnt not found");
new_vp->v_vmnt = vmp;
new_vp->v_dev = vmp->m_dev;
new_vp->v_fs_count = 1;
vp = new_vp;
2007-08-07 14:52:47 +02:00
}
2012-02-13 16:28:04 +01:00
dup_vnode(vp);
if (do_downgrade) {
/* Only downgrade a lock if we managed to lock it in the first place */
*(resolve->l_vnode) = vp;
if (initial_locktype != resolve->l_vnode_lock)
tll_downgrade(&vp->v_lock);
#if LOCK_DEBUG
if (resolve->l_vnode_lock == VNODE_READ)
fp->fp_vp_rdlocks++;
#endif
}
2012-02-13 16:28:04 +01:00
return(vp);
}
2007-08-07 14:52:47 +02:00
/*===========================================================================*
* eat_path *
2007-08-07 14:52:47 +02:00
*===========================================================================*/
2012-03-25 20:25:53 +02:00
struct vnode *eat_path(resolve, rfp)
2012-02-13 16:28:04 +01:00
struct lookup *resolve;
struct fproc *rfp;
2007-08-07 14:52:47 +02:00
{
2012-02-13 16:28:04 +01:00
/* Resolve path to a vnode. advance does the actual work. */
struct vnode *start_dir;
2012-02-13 16:28:04 +01:00
start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
return advance(start_dir, resolve, rfp);
2007-01-05 17:36:55 +01:00
}
/*===========================================================================*
* last_dir *
2007-01-05 17:36:55 +01:00
*===========================================================================*/
2012-03-25 20:25:53 +02:00
struct vnode *last_dir(resolve, rfp)
2012-02-13 16:28:04 +01:00
struct lookup *resolve;
struct fproc *rfp;
2007-01-05 17:36:55 +01:00
{
2012-02-13 16:28:04 +01:00
/* Parse a path, as far as the last directory, fetch the vnode
* for the last directory into the vnode table, and return a pointer to the
* vnode. In addition, return the final component of the path in 'string'. If
* the last directory can't be opened, return NULL and the reason for
* failure in 'err_code'. We can't parse component by component as that would
* be too expensive. Alternatively, we cut off the last component of the path,
* and parse the path up to the penultimate component.
2012-02-13 16:28:04 +01:00
*/
size_t len;
char *cp;
char dir_entry[NAME_MAX+1];
struct vnode *start_dir, *res_vp, *sym_vp, *sym_vp_l, *loop_start;
2012-02-13 16:28:04 +01:00
struct vmnt *sym_vmp = NULL;
int r, symloop = 0, ret_on_symlink = 0;
struct lookup symlink;
2007-08-07 14:52:47 +02:00
2012-02-13 16:28:04 +01:00
*resolve->l_vnode = NULL;
*resolve->l_vmp = NULL;
loop_start = NULL;
sym_vp = NULL;
2007-08-07 14:52:47 +02:00
2012-02-13 16:28:04 +01:00
ret_on_symlink = !!(resolve->l_flags & PATH_RET_SYMLINK);
do {
/* Is the path absolute or relative? Initialize 'start_dir'
* accordingly. Use loop_start in case we're looping.
*/
if (loop_start != NULL)
start_dir = loop_start;
else
start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd:rfp->fp_wd);
len = strlen(resolve->l_path);
/* If path is empty, return ENOENT. */
if (len == 0) {
err_code = ENOENT;
res_vp = NULL;
break;
}
2007-08-07 14:52:47 +02:00
#if !DO_POSIX_PATHNAME_RES
2012-02-13 16:28:04 +01:00
/* Remove trailing slashes */
while (len > 1 && resolve->l_path[len-1] == '/') {
len--;
resolve->l_path[len]= '\0';
}
#endif
2007-01-05 17:36:55 +01:00
2012-02-13 16:28:04 +01:00
cp = strrchr(resolve->l_path, '/');
if (cp == NULL) {
/* Just an entry in the current working directory. Prepend
* "./" in front of the path and resolve it.
*/
if (strlcpy(dir_entry, resolve->l_path, NAME_MAX+1) >= NAME_MAX + 1) {
err_code = ENAMETOOLONG;
res_vp = NULL;
break;
}
dir_entry[NAME_MAX] = '\0';
resolve->l_path[0] = '.';
resolve->l_path[1] = '\0';
2012-02-13 16:28:04 +01:00
} else if (cp[1] == '\0') {
/* Path ends in a slash. The directory entry is '.' */
2012-07-13 18:08:06 +02:00
strlcpy(dir_entry, ".", NAME_MAX+1);
2012-02-13 16:28:04 +01:00
} else {
/* A path name for the directory and a directory entry */
if (strlcpy(dir_entry, cp+1, NAME_MAX+1) >= NAME_MAX + 1) {
err_code = ENAMETOOLONG;
res_vp = NULL;
break;
}
2012-02-13 16:28:04 +01:00
cp[1] = '\0';
dir_entry[NAME_MAX] = '\0';
}
/* Remove trailing slashes */
while (cp > resolve->l_path && cp[0] == '/') {
cp[0]= '\0';
cp--;
}
/* Resolve up to and including the last directory of the path. Turn off
* PATH_RET_SYMLINK, because we do want to follow the symlink in this
* case. That is, the flag is meant for the actual filename of the path,
* not the last directory.
*/
resolve->l_flags &= ~PATH_RET_SYMLINK;
if ((res_vp = advance(start_dir, resolve, rfp)) == NULL) {
break;
}
/* If the directory entry is not a symlink we're done now. If it is a
* symlink, then we're not at the last directory, yet. */
/* Copy the directory entry back to user_fullpath */
2012-07-13 18:08:06 +02:00
strlcpy(resolve->l_path, dir_entry, NAME_MAX + 1);
2012-02-13 16:28:04 +01:00
/* Look up the directory entry, but do not follow the symlink when it
* is one. Note: depending on the previous advance, we might not be
* able to lock the resulting vnode. For example, when we look up "./."
* and request a VNODE_WRITE lock on the result, then the previous
* advance has "./" locked. The next advance to "." will try to lock
* the same vnode with a VNODE_READ lock, and fail. When that happens,
* sym_vp_l will be NULL and we must not unlock the vnode. If we would
* unlock, we actually unlock the vnode locked by the previous advance.
2012-02-13 16:28:04 +01:00
*/
lookup_init(&symlink, resolve->l_path,
resolve->l_flags|PATH_RET_SYMLINK, &sym_vmp, &sym_vp_l);
2012-02-13 16:28:04 +01:00
symlink.l_vmnt_lock = VMNT_READ;
symlink.l_vnode_lock = VNODE_READ;
2012-02-13 16:28:04 +01:00
sym_vp = advance(res_vp, &symlink, rfp);
if (sym_vp == NULL) break;
if (S_ISLNK(sym_vp->v_mode)) {
2012-02-13 16:28:04 +01:00
/* Last component is a symlink, but if we've been asked to not
* resolve it, return now.
*/
if (ret_on_symlink) {
break;
}
r = req_rdlink(sym_vp->v_fs_e, sym_vp->v_inode_nr, NONE,
(vir_bytes) resolve->l_path, PATH_MAX - 1, 1);
2012-02-13 16:28:04 +01:00
if (r < 0) {
/* Failed to read link */
err_code = r;
unlock_vnode(res_vp);
unlock_vmnt(*resolve->l_vmp);
put_vnode(res_vp);
*resolve->l_vmp = NULL;
*resolve->l_vnode = NULL;
res_vp = NULL;
break;
}
resolve->l_path[r] = '\0';
if (strrchr(resolve->l_path, '/') != NULL) {
if (sym_vp_l != NULL)
unlock_vnode(sym_vp);
2012-02-13 16:28:04 +01:00
unlock_vmnt(*resolve->l_vmp);
if (sym_vmp != NULL)
unlock_vmnt(sym_vmp);
*resolve->l_vmp = NULL;
put_vnode(sym_vp);
sym_vp = NULL;
symloop++;
/* Relative symlinks are relative to res_vp, not cwd */
if (resolve->l_path[0] != '/') {
loop_start = res_vp;
} else {
/* Absolute symlink, forget about res_vp */
unlock_vnode(res_vp);
put_vnode(res_vp);
}
continue;
}
} else {
symloop = 0; /* Not a symlink, so restart counting */
/* If we're crossing a mount point, return root node of mount
* point on which the file resides. That's the 'real' last
* dir that holds the file we're looking for.
*/
if (sym_vp->v_fs_e != res_vp->v_fs_e) {
assert(sym_vmp != NULL);
/* Unlock final file, it might have wrong lock types */
if (sym_vp_l != NULL)
unlock_vnode(sym_vp);
unlock_vmnt(sym_vmp);
put_vnode(sym_vp);
sym_vp = NULL;
/* Also unlock and release erroneous result */
unlock_vnode(*resolve->l_vnode);
unlock_vmnt(*resolve->l_vmp);
put_vnode(res_vp);
/* Relock vmnt and vnode with correct lock types */
lock_vmnt(sym_vmp, resolve->l_vmnt_lock);
lock_vnode(sym_vmp->m_root_node, resolve->l_vnode_lock);
res_vp = sym_vmp->m_root_node;
dup_vnode(res_vp);
*resolve->l_vnode = res_vp;
*resolve->l_vmp = sym_vmp;
/* We've effectively resolved the final component, so
* change it to current directory to prevent future
* 'advances' of returning erroneous results.
*/
strlcpy(dir_entry, ".", NAME_MAX+1);
}
2012-02-13 16:28:04 +01:00
}
break;
} while (symloop < SYMLOOP_MAX);
if (symloop >= SYMLOOP_MAX) {
err_code = ELOOP;
res_vp = NULL;
}
2007-08-07 14:52:47 +02:00
2012-02-13 16:28:04 +01:00
if (sym_vp != NULL) {
if (sym_vp_l != NULL) {
unlock_vnode(sym_vp);
}
2012-02-13 16:28:04 +01:00
if (sym_vmp != NULL) {
unlock_vmnt(sym_vmp);
}
put_vnode(sym_vp);
}
2012-02-13 16:28:04 +01:00
if (loop_start != NULL) {
unlock_vnode(loop_start);
put_vnode(loop_start);
}
/* Copy the directory entry back to user_fullpath */
2012-07-13 18:08:06 +02:00
strlcpy(resolve->l_path, dir_entry, NAME_MAX + 1);
/* Turn PATH_RET_SYMLINK flag back on if it was on */
if (ret_on_symlink) resolve->l_flags |= PATH_RET_SYMLINK;
2012-02-13 16:28:04 +01:00
return(res_vp);
2007-08-07 14:52:47 +02:00
}
2007-01-05 17:36:55 +01:00
2007-08-07 14:52:47 +02:00
/*===========================================================================*
* lookup *
2007-08-07 14:52:47 +02:00
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int lookup(start_node, resolve, result_node, rfp)
2007-08-07 14:52:47 +02:00
struct vnode *start_node;
2012-02-13 16:28:04 +01:00
struct lookup *resolve;
node_details_t *result_node;
struct fproc *rfp;
2007-08-07 14:52:47 +02:00
{
2012-02-13 16:28:04 +01:00
/* Resolve a path name relative to start_node. */
2007-08-07 14:52:47 +02:00
int r, symloop;
endpoint_t fs_e;
size_t path_off, path_left_len;
2007-08-07 14:52:47 +02:00
ino_t dir_ino, root_ino;
uid_t uid;
gid_t gid;
struct vnode *dir_vp;
2012-02-13 16:28:04 +01:00
struct vmnt *vmp, *vmpres;
2007-08-07 14:52:47 +02:00
struct lookup_res res;
VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
2012-11-30 13:49:53 +01:00
tll_access_t mnt_lock_type;
2012-02-13 16:28:04 +01:00
assert(resolve->l_vmp);
assert(resolve->l_vnode);
*(resolve->l_vmp) = vmpres = NULL; /* No vmnt found nor locked yet */
2007-01-05 17:36:55 +01:00
/* Empty (start) path? */
2012-02-13 16:28:04 +01:00
if (resolve->l_path[0] == '\0') {
result_node->inode_nr = 0;
return(ENOENT);
2007-01-05 17:36:55 +01:00
}
2012-02-13 16:28:04 +01:00
if (!rfp->fp_rd || !rfp->fp_wd) {
printf("VFS: lookup %d: no rd/wd\n", rfp->fp_endpoint);
return(ENOENT);
}
2007-08-07 14:52:47 +02:00
fs_e = start_node->v_fs_e;
dir_ino = start_node->v_inode_nr;
2012-02-13 16:28:04 +01:00
vmpres = find_vmnt(fs_e);
if (vmpres == NULL) return(EIO); /* mountpoint vanished? */
2007-01-05 17:36:55 +01:00
/* Is the process' root directory on the same partition?,
* if so, set the chroot directory too. */
if (rfp->fp_rd->v_dev == rfp->fp_wd->v_dev)
2012-02-13 16:28:04 +01:00
root_ino = rfp->fp_rd->v_inode_nr;
2007-01-05 17:36:55 +01:00
else
root_ino = 0;
2007-01-05 17:36:55 +01:00
2007-08-07 14:52:47 +02:00
/* Set user and group ids according to the system call */
uid = (job_call_nr == ACCESS ? rfp->fp_realuid : rfp->fp_effuid);
gid = (job_call_nr == ACCESS ? rfp->fp_realgid : rfp->fp_effgid);
2007-08-07 14:52:47 +02:00
symloop = 0; /* Number of symlinks seen so far */
2007-01-05 17:36:55 +01:00
2012-02-13 16:28:04 +01:00
/* Lock vmnt */
VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
2012-11-30 13:49:53 +01:00
if (resolve->l_vmnt_lock == VMNT_READ)
mnt_lock_type = VMNT_WRITE;
else
mnt_lock_type = resolve->l_vmnt_lock;
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
2012-02-13 16:28:04 +01:00
if (r == EBUSY) /* vmnt already locked */
vmpres = NULL;
else
return(r);
}
*(resolve->l_vmp) = vmpres;
2007-01-05 17:36:55 +01:00
/* Issue the request */
2012-02-13 16:28:04 +01:00
r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
2007-08-07 14:52:47 +02:00
2012-02-13 16:28:04 +01:00
if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
if (vmpres) unlock_vmnt(vmpres);
*(resolve->l_vmp) = NULL;
return(r); /* i.e., an error occured */
2012-02-13 16:28:04 +01:00
}
2007-01-05 17:36:55 +01:00
2012-02-13 16:28:04 +01:00
/* While the response is related to mount control set the
2007-01-05 17:36:55 +01:00
* new requests respectively */
2012-02-13 16:28:04 +01:00
while (r == EENTERMOUNT || r == ELEAVEMOUNT || r == ESYMLINK) {
/* Update user_fullpath to reflect what's left to be parsed. */
path_off = res.char_processed;
2012-02-13 16:28:04 +01:00
path_left_len = strlen(&resolve->l_path[path_off]);
memmove(resolve->l_path, &resolve->l_path[path_off], path_left_len);
resolve->l_path[path_left_len] = '\0'; /* terminate string */
2007-08-07 14:52:47 +02:00
/* Update the current value of the symloop counter */
symloop += res.symloop;
2012-02-13 16:28:04 +01:00
if (symloop > SYMLOOP_MAX) {
if (vmpres) unlock_vmnt(vmpres);
*(resolve->l_vmp) = NULL;
return(ELOOP);
2012-02-13 16:28:04 +01:00
}
2007-08-07 14:52:47 +02:00
/* Symlink encountered with absolute path */
if (r == ESYMLINK) {
dir_vp = rfp->fp_rd;
2012-02-13 16:28:04 +01:00
vmp = NULL;
} else if (r == EENTERMOUNT) {
2007-08-07 14:52:47 +02:00
/* Entering a new partition */
2012-02-13 16:28:04 +01:00
dir_vp = NULL;
2007-08-07 14:52:47 +02:00
/* Start node is now the mounted partition's root node */
for (vmp = &vmnt[0]; vmp != &vmnt[NR_MNTS]; ++vmp) {
if (vmp->m_dev != NO_DEV && vmp->m_mounted_on) {
if (vmp->m_mounted_on->v_inode_nr == res.inode_nr &&
vmp->m_mounted_on->v_fs_e == res.fs_e) {
2007-08-07 14:52:47 +02:00
dir_vp = vmp->m_root_node;
break;
}
2007-08-07 14:52:47 +02:00
}
}
2012-02-13 16:28:04 +01:00
if (dir_vp == NULL) {
printf("VFS: path lookup error; root node not found\n");
if (vmpres) unlock_vmnt(vmpres);
*(resolve->l_vmp) = NULL;
return(EIO);
}
} else {
2007-08-07 14:52:47 +02:00
/* Climbing up mount */
/* Find the vmnt that represents the partition on
* which we "climb up". */
if ((vmp = find_vmnt(res.fs_e)) == NULL) {
panic("VFS lookup: can't find parent vmnt");
2012-02-13 16:28:04 +01:00
}
/* Make sure that the child FS does not feed a bogus path
* to the parent FS. That is, when we climb up the tree, we
* must've encountered ".." in the path, and that is exactly
* what we're going to feed to the parent */
2012-02-13 16:28:04 +01:00
if(strncmp(resolve->l_path, "..", 2) != 0 ||
(resolve->l_path[2] != '\0' && resolve->l_path[2] != '/')) {
printf("VFS: bogus path: %s\n", resolve->l_path);
if (vmpres) unlock_vmnt(vmpres);
*(resolve->l_vmp) = NULL;
return(ENOENT);
}
2007-08-07 14:52:47 +02:00
/* Start node is the vnode on which the partition is
* mounted */
dir_vp = vmp->m_mounted_on;
}
/* Set the starting directories inode number and FS endpoint */
fs_e = dir_vp->v_fs_e;
dir_ino = dir_vp->v_inode_nr;
2007-08-07 14:52:47 +02:00
/* Is the process' root directory on the same partition?,
* if so, set the chroot directory too. */
2012-02-13 16:28:04 +01:00
if (dir_vp->v_dev == rfp->fp_rd->v_dev)
root_ino = rfp->fp_rd->v_inode_nr;
2007-08-07 14:52:47 +02:00
else
root_ino = 0;
2012-02-13 16:28:04 +01:00
/* Unlock a previously locked vmnt if locked and lock new vmnt */
if (vmpres) unlock_vmnt(vmpres);
vmpres = find_vmnt(fs_e);
if (vmpres == NULL) return(EIO); /* mount point vanished? */
VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
2012-11-30 13:49:53 +01:00
if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) {
2012-02-13 16:28:04 +01:00
if (r == EBUSY)
vmpres = NULL; /* Already locked */
else
return(r);
}
*(resolve->l_vmp) = vmpres;
r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
2007-08-07 14:52:47 +02:00
2012-02-13 16:28:04 +01:00
if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
if (vmpres) unlock_vmnt(vmpres);
*(resolve->l_vmp) = NULL;
return(r);
2012-02-13 16:28:04 +01:00
}
2007-01-05 17:36:55 +01:00
}
VFS: fix locking bugs .sync and fsync used unnecessarily restrictive locking type .fsync violated locking order by obtaining a vmnt lock after a filp lock .fsync contained a TOCTOU bug .new_node violated locking rules (didn't upgrade lock upon file creation) .do_pipe used unnecessarily restrictive locking type .always lock pipes exclusively; even a read operation might require to do a write on a vnode object (update pipe size) .when opening a file with O_TRUNC, upgrade vnode lock when truncating .utime used unnecessarily restrictive locking type .path parsing: .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to VMNT_READ if that was what was actually requested. This prevents the following deadlock scenario: thread A: lock_vmnt(vmp, TLL_READSER); lock_vnode(vp, TLL_READSER); upgrade_vmnt_lock(vmp, TLL_WRITE); thread B: lock_vmnt(vmp, TLL_READ); lock_vnode(vp, TLL_READSER); thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in lock_vnode. This happens when, for example, thread A tries create a new node (open.c:new_node) and thread B tries to do eat_path to change dir (stadir.c:do_chdir). When the path is being resolved, a vnode is always locked with VNODE_OPCL (TLL_READSER) and then downgraded to VNODE_READ if read-only is actually requested. Thread A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows VMNT_READ locks. Thread B can't acquire a lock on the vnode because thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE (TLL_WRITE) because thread B has a VMNT_READ lock on it. By serializing vmnt locks during path parsing, thread B can only acquire a lock on vmp when thread A has completely finished its operation.
2012-11-30 13:49:53 +01:00
if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) {
/* downgrade VMNT_WRITE to VMNT_READ */
downgrade_vmnt_lock(*(resolve->l_vmp));
}
2007-01-05 17:36:55 +01:00
/* Fill in response fields */
2012-02-13 16:28:04 +01:00
result_node->inode_nr = res.inode_nr;
result_node->fmode = res.fmode;
result_node->fsize = res.fsize;
result_node->dev = res.dev;
result_node->fs_e = res.fs_e;
result_node->uid = res.uid;
result_node->gid = res.gid;
return(r);
}
2012-02-13 16:28:04 +01:00
/*===========================================================================*
* lookup_init *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void lookup_init(resolve, path, flags, vmp, vp)
2012-02-13 16:28:04 +01:00
struct lookup *resolve;
char *path;
int flags;
struct vmnt **vmp;
struct vnode **vp;
{
assert(vmp != NULL);
assert(vp != NULL);
resolve->l_path = path;
resolve->l_flags = flags;
resolve->l_vmp = vmp;
resolve->l_vnode = vp;
resolve->l_vmnt_lock = TLL_NONE;
resolve->l_vnode_lock = TLL_NONE;
*vmp = NULL; /* Initialize lookup result to NULL */
*vp = NULL;
}
/*===========================================================================*
* get_name *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
int get_name(dirp, entry, ename)
struct vnode *dirp;
struct vnode *entry;
char ename[NAME_MAX + 1];
{
#define DIR_ENTRIES 8
#define DIR_ENTRY_SIZE (sizeof(struct dirent) + NAME_MAX)
off_t pos, new_pos;
int r, consumed, totalbytes, name_len;
char buf[DIR_ENTRY_SIZE * DIR_ENTRIES];
struct dirent *cur;
pos = 0;
2010-11-12 19:38:10 +01:00
2012-04-25 14:44:42 +02:00
if (!S_ISDIR(dirp->v_mode)) return(EBADF);
do {
2012-02-13 16:28:04 +01:00
r = req_getdents(dirp->v_fs_e, dirp->v_inode_nr, pos, buf, sizeof(buf),
&new_pos, 1);
if (r == 0) {
return(ENOENT); /* end of entries -- matching inode !found */
} else if (r < 0) {
return(r); /* error */
}
consumed = 0; /* bytes consumed */
totalbytes = r; /* number of bytes to consume */
do {
cur = (struct dirent *) (buf + consumed);
name_len = cur->d_reclen - offsetof(struct dirent, d_name) - 1;
2012-11-30 13:22:36 +01:00
if(cur->d_name + name_len+1 > &buf[sizeof(buf)])
return(EINVAL); /* Rubbish in dir entry */
if (entry->v_inode_nr == cur->d_ino) {
/* found the entry we were looking for */
int copylen = MIN(name_len + 1, NAME_MAX + 1);
if (strlcpy(ename, cur->d_name, copylen) >= copylen) {
return(ENAMETOOLONG);
}
ename[NAME_MAX] = '\0';
return(OK);
}
/* not a match -- move on to the next dirent */
consumed += cur->d_reclen;
} while (consumed < totalbytes);
pos = new_pos;
} while (1);
}
/*===========================================================================*
* canonical_path *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
int canonical_path(orig_path, rfp)
2012-02-13 16:28:04 +01:00
char orig_path[PATH_MAX];
struct fproc *rfp;
{
2012-02-13 16:28:04 +01:00
/* Find canonical path of a given path */
int len = 0;
int r, symloop = 0;
struct vnode *dir_vp, *parent_dir;
2012-02-13 16:28:04 +01:00
struct vmnt *dir_vmp, *parent_vmp;
char component[NAME_MAX+1]; /* NAME_MAX does /not/ include '\0' */
char temp_path[PATH_MAX];
struct lookup resolve;
parent_dir = dir_vp = NULL;
parent_vmp = dir_vmp = NULL;
2012-07-13 18:08:06 +02:00
strlcpy(temp_path, orig_path, PATH_MAX);
2012-02-13 16:28:04 +01:00
temp_path[PATH_MAX - 1] = '\0';
2012-02-13 16:28:04 +01:00
/* First resolve path to the last directory holding the file */
do {
2012-02-13 16:28:04 +01:00
if (dir_vp) {
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(dir_vp);
}
2012-02-13 16:28:04 +01:00
lookup_init(&resolve, temp_path, PATH_NOFLAGS, &dir_vmp, &dir_vp);
resolve.l_vmnt_lock = VMNT_READ;
resolve.l_vnode_lock = VNODE_READ;
if ((dir_vp = last_dir(&resolve, rfp)) == NULL) return(err_code);
/* dir_vp points to dir and resolve path now contains only the
* filename.
*/
2012-07-13 18:08:06 +02:00
strlcpy(orig_path, temp_path, NAME_MAX+1); /* Store file name */
/* If we're just crossing a mount point, our name has changed to '.' */
if (!strcmp(orig_path, ".")) orig_path[0] = '\0';
/* check if the file is a symlink, if so resolve it */
2012-02-13 16:28:04 +01:00
r = rdlink_direct(orig_path, temp_path, rfp);
if (r <= 0)
break;
/* encountered a symlink -- loop again */
2012-07-13 18:08:06 +02:00
strlcpy(orig_path, temp_path, PATH_MAX);
symloop++;
} while (symloop < SYMLOOP_MAX);
if (symloop >= SYMLOOP_MAX) {
2012-02-13 16:28:04 +01:00
if (dir_vp) {
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(dir_vp);
}
return(ELOOP);
}
2012-02-13 16:28:04 +01:00
/* We've got the filename and the actual directory holding the file. From
* here we start building up the canonical path by climbing up the tree */
while (dir_vp != rfp->fp_rd) {
2012-07-13 18:08:06 +02:00
strlcpy(temp_path, "..", NAME_MAX+1);
/* check if we're at the root node of the file system */
if (dir_vp->v_vmnt->m_root_node == dir_vp) {
if (dir_vp->v_vmnt->m_mounted_on == NULL) {
/* Bail out, we can't go any higher */
break;
}
2012-02-13 16:28:04 +01:00
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(dir_vp);
dir_vp = dir_vp->v_vmnt->m_mounted_on;
2012-02-13 16:28:04 +01:00
dir_vmp = dir_vp->v_vmnt;
if (lock_vmnt(dir_vmp, VMNT_READ) != OK)
panic("failed to lock vmnt");
if (lock_vnode(dir_vp, VNODE_READ) != OK)
panic("failed to lock vnode");
dup_vnode(dir_vp);
}
2012-02-13 16:28:04 +01:00
lookup_init(&resolve, temp_path, PATH_NOFLAGS, &parent_vmp,
&parent_dir);
resolve.l_vmnt_lock = VMNT_READ;
resolve.l_vnode_lock = VNODE_READ;
if ((parent_dir = advance(dir_vp, &resolve, rfp)) == NULL) {
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(dir_vp);
return(err_code);
}
/* now we have to retrieve the name of the parent directory */
if ((r = get_name(parent_dir, dir_vp, component)) != OK) {
2012-02-13 16:28:04 +01:00
unlock_vnode(parent_dir);
unlock_vmnt(parent_vmp);
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(parent_dir);
2012-02-13 16:28:04 +01:00
put_vnode(dir_vp);
return(r);
}
len += strlen(component) + 1;
if (len >= PATH_MAX) {
2012-02-13 16:28:04 +01:00
/* adding the component to orig_path would exceed PATH_MAX */
unlock_vnode(parent_dir);
unlock_vmnt(parent_vmp);
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(parent_dir);
2012-02-13 16:28:04 +01:00
put_vnode(dir_vp);
return(ENOMEM);
}
2012-02-13 16:28:04 +01:00
/* Store result of component in orig_path. First make space by moving
* the contents of orig_path to the right. Move strlen + 1 bytes to
* include the terminating '\0'. Move to strlen + 1 bytes to reserve
* space for the slash.
*/
2012-02-13 16:28:04 +01:00
memmove(orig_path+strlen(component)+1, orig_path, strlen(orig_path)+1);
/* Copy component into canon_path */
2012-02-13 16:28:04 +01:00
memmove(orig_path, component, strlen(component));
/* Put slash into place */
2012-02-13 16:28:04 +01:00
orig_path[strlen(component)] = '/';
/* Store parent_dir result, and continue the loop once more */
2012-02-13 16:28:04 +01:00
unlock_vnode(dir_vp);
unlock_vmnt(dir_vmp);
put_vnode(dir_vp);
dir_vp = parent_dir;
dir_vmp = parent_vmp;
parent_vmp = NULL;
}
unlock_vmnt(dir_vmp);
2012-02-13 16:28:04 +01:00
unlock_vnode(dir_vp);
put_vnode(dir_vp);
/* add the leading slash */
len = strlen(orig_path);
2012-02-13 16:28:04 +01:00
if (strlen(orig_path) >= PATH_MAX) return(ENAMETOOLONG);
memmove(orig_path+1, orig_path, len + 1 /* include terminating nul */);
2012-02-13 16:28:04 +01:00
orig_path[0] = '/';
/* remove trailing slash if there is any */
if (len > 1 && orig_path[len] == '/') orig_path[len] = '\0';
return(OK);
}
/*===========================================================================*
* check_perms *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static int check_perms(ep, io_gr, pathlen)
endpoint_t ep;
cp_grant_id_t io_gr;
2012-02-13 16:28:04 +01:00
size_t pathlen;
{
2012-02-13 16:28:04 +01:00
int r, slot;
struct vnode *vp;
2012-02-13 16:28:04 +01:00
struct vmnt *vmp;
struct fproc *rfp;
char canon_path[PATH_MAX];
2012-02-13 16:28:04 +01:00
struct lookup resolve;
2012-02-13 16:28:04 +01:00
if (isokendpt(ep, &slot) != OK) return(EINVAL);
if (pathlen < UNIX_PATH_MAX || pathlen >= PATH_MAX) return(EINVAL);
2012-02-13 16:28:04 +01:00
rfp = &(fproc[slot]);
r = sys_safecopyfrom(PFS_PROC_NR, io_gr, (vir_bytes) 0,
(vir_bytes) canon_path, pathlen);
2012-02-13 16:28:04 +01:00
if (r != OK) return(r);
canon_path[pathlen] = '\0';
2012-02-13 16:28:04 +01:00
/* Turn path into canonical path to the socket file */
if ((r = canonical_path(canon_path, rfp)) != OK) return(r);
2012-02-13 16:28:04 +01:00
if (strlen(canon_path) >= pathlen) return(ENAMETOOLONG);
/* copy canon_path back to PFS */
2012-02-13 16:28:04 +01:00
r = sys_safecopyto(PFS_PROC_NR, (cp_grant_id_t) io_gr, (vir_bytes) 0,
(vir_bytes) canon_path, pathlen);
2012-02-13 16:28:04 +01:00
if (r != OK) return(r);
2012-02-13 16:28:04 +01:00
/* Now do permissions checking */
lookup_init(&resolve, canon_path, PATH_NOFLAGS, &vmp, &vp);
resolve.l_vmnt_lock = VMNT_READ;
resolve.l_vnode_lock = VNODE_READ;
if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
/* check permissions */
r = forbidden(rfp, vp, (R_BIT | W_BIT));
2012-02-13 16:28:04 +01:00
unlock_vnode(vp);
unlock_vmnt(vmp);
put_vnode(vp);
return(r);
}
/*===========================================================================*
* do_check_perms *
*===========================================================================*/
int do_check_perms(message *UNUSED(m_out))
{
/* This should be replaced by an ACL check. */
if (who_e != PFS_PROC_NR) return EPERM;
return check_perms(job_m_in.VFS_PFS_ENDPT, job_m_in.VFS_PFS_GRANT,
(size_t) job_m_in.VFS_PFS_COUNT);
}