Merge AVFS and APFS

2011-08-17 13:23:45 +00:00 · 2011-08-17 13:23:45 +00:00 · a6bd3f4a22
commit a6bd3f4a22
parent ee48228aa0
78 changed files with 17905 additions and 9 deletions
--- a/servers/Makefile
+++ b/servers/Makefile
@ -3,14 +3,22 @@
 .include <bsd.own.mk>
 .if ${BUILDAVFS} == "yes"
 VFS= "avfs"
 PFS= "apfs"
 .else
 VFS= "vfs"
 PFS= "pfs"
 .endif
 .if ${MKIMAGEONLY} == "yes"
-SUBDIR=	ds init mfs pfs pm rs sched vfs vm
+SUBDIR=	ds init mfs ${PFS} pm rs sched ${VFS} vm
 .else
 SUBDIR=	ds ext2 hgfs inet init ipc is iso9660fs \
-	mfs pfs pm procfs rs sched vfs vm devman
+	mfs ${PFS} pm procfs rs sched ${VFS} vm devman
 .endif
--- a/servers/apfs/Makefile
+++ b/servers/apfs/Makefile
@ -0,0 +1,14 @@
 # Makefile for Pipe File System (PFS)
 PROG=	pfs
 SRCS=	open.c table.c inode.c main.c super.c link.c \
 	buffer.c read.c misc.c mount.c utility.c stadir.c \
 	uds.c dev_uds.c
 DPADD+=	${LIBDRIVER} ${LIBSYS}
 LDADD+=	-ldriver -lsys
 MAN=
 BINDIR?= /usr/sbin
 .include <minix.bootprog.mk>
--- a/servers/apfs/buf.h
+++ b/servers/apfs/buf.h
@ -0,0 +1,26 @@
 #ifndef __PFS_BUF_H__
 #define __PFS_BUF_H__
 /* Buffer (block) cache.
 */
 struct buf {
  /* Data portion of the buffer. */
  char b_data[PIPE_BUF];     /* ordinary user data */
  /* Header portion of the buffer. */
  struct buf *b_next;           /* used to link all free bufs in a chain */
  struct buf *b_prev;           /* used to link all free bufs the other way */
  ino_t b_num;			/* inode number on minor device */
  dev_t b_dev;                  /* major | minor device where block resides */
  int b_bytes;                  /* Number of bytes allocated in bp */
  int b_count;			/* Number of users of this buffer */
 };
 /* A block is free if b_dev == NO_DEV. */
 EXTERN struct buf *front;	/* points to least recently used free block */
 EXTERN struct buf *rear;	/* points to most recently used free block */
 #endif
--- a/servers/apfs/buffer.c
+++ b/servers/apfs/buffer.c
@ -0,0 +1,103 @@
 #include "fs.h"
 #include "buf.h"
 #include "inode.h"
 #include <sys/types.h>
 #include <stdlib.h>
 #include <string.h>
 FORWARD _PROTOTYPE( struct buf *new_block, (dev_t dev, ino_t inum)			);
 /*===========================================================================*
 *                              buf_pool                                     *
 *===========================================================================*/
 PUBLIC void buf_pool(void)
 {
 /* Initialize the buffer pool. */
  front = NULL;
  rear = NULL;
 }
 /*===========================================================================*
 *				get_block				     *
 *===========================================================================*/
 PUBLIC struct buf *get_block(dev_t dev, ino_t inum)
 {
  struct buf *bp = front;
  while(bp != NULL) {
 	if (bp->b_dev == dev && bp->b_num == inum) {
 		bp->b_count++;
 		return(bp);
 	}
 	bp = bp->b_next;
  }
  /* Buffer was not found. Try to allocate a new one */
  return new_block(dev, inum);
 }
 /*===========================================================================*
 *				new_block				     *
 *===========================================================================*/
 PRIVATE struct buf *new_block(dev_t dev, ino_t inum)
 {
 /* Allocate a new buffer and add it to the double linked buffer list */
  struct buf *bp;
  bp = malloc(sizeof(struct buf));
  if (bp == NULL) {
 	err_code = ENOSPC;
 	return(NULL);
  }
  bp->b_num = inum;
  bp->b_dev = dev;
  bp->b_bytes = 0;
  bp->b_count = 1;
  memset(bp->b_data, 0 , PIPE_BUF);
  /* Add at the end of the buffer */
  if (front == NULL) {	/* Empty list? */
 	front = bp;
 	bp->b_prev = NULL;
  } else {
 	rear->b_next = bp;
 	bp->b_prev = rear;
  }
  bp->b_next = NULL;
  rear = bp;
  return(bp);
 }
 /*===========================================================================*
 *				put_block				     *
 *===========================================================================*/
 PUBLIC void put_block(dev_t dev, ino_t inum)
 {
  struct buf *bp;
  bp = get_block(dev, inum);
  if (bp == NULL) return; /* We didn't find the block. Nothing to put. */
  bp->b_count--;	/* Compensate for above 'get_block'. */
  if (--bp->b_count > 0) return;
  /* Cut bp out of the loop */
  if (bp->b_prev == NULL)
 	front = bp->b_next;
  else
 	bp->b_prev->b_next = bp->b_next;
  if (bp->b_next == NULL)
 	rear = bp->b_prev;
  else
 	bp->b_next->b_prev = bp->b_prev;
  /* Buffer administration is done. Now it's safe to free up bp. */
  free(bp);
 }
--- a/servers/apfs/const.h
+++ b/servers/apfs/const.h
@ -0,0 +1,42 @@
 #ifndef __PFS_CONST_H__
 #define __PFS_CONST_H__
 #define NR_INODES        256 	/* # slots in "in core" inode table */
 /* Size of descriptor table for unix domain sockets. This should be
 * equal to the maximum number of minor devices (currently 256).
 */
 #define NR_FDS           256
 #define INODE_HASH_LOG2   7     /* 2 based logarithm of the inode hash size */
 #define INODE_HASH_SIZE   ((unsigned long)1<<INODE_HASH_LOG2)
 #define INODE_HASH_MASK   (((unsigned long)1<<INODE_HASH_LOG2)-1)
 /* The type of sizeof may be (unsigned) long.  Use the following macro for
 * taking the sizes of small objects so that there are no surprises like
 * (small) long constants being passed to routines expecting an int.
 */
 #define usizeof(t) ((unsigned) sizeof(t))
 /* Miscellaneous constants */
 #define INVAL_UID ((uid_t) -1)	/* Invalid user ID */
 #define INVAL_GID ((gid_t) -1)	/* Invalid group ID */
 #define NORMAL	           0	/* forces get_block to do disk read */
 #define NO_READ            1	/* prevents get_block from doing disk read */
 #define PREFETCH           2	/* tells get_block not to read or mark dev */
 #define NO_BIT   ((bit_t) 0)	/* returned by alloc_bit() to signal failure */
 #define ATIME            002	/* set if atime field needs updating */
 #define CTIME            004	/* set if ctime field needs updating */
 #define MTIME            010	/* set if mtime field needs updating */
 #define FS_BITMAP_CHUNKS(b) ((b)/usizeof (bitchunk_t))/* # map chunks/blk   */
 #define FS_BITCHUNK_BITS		(usizeof(bitchunk_t) * CHAR_BIT)
 #define FS_BITS_PER_BLOCK(b)	(FS_BITMAP_CHUNKS(b) * FS_BITCHUNK_BITS)
 #define FS_CALL_VEC_SIZE 31
 #define DEV_CALL_VEC_SIZE 25
 #endif
--- a/servers/apfs/dev_uds.c
+++ b/servers/apfs/dev_uds.c
--- a/servers/apfs/fs.h
+++ b/servers/apfs/fs.h
@ -0,0 +1,31 @@
 #ifndef __PFS_FS_H__
 #define __PFS_FS_H__
 /* This is the master header for pfs.  It includes some other files
 * and defines the principal constants.
 */
 #define _POSIX_SOURCE      1	/* tell headers to include POSIX stuff */
 #define _MINIX             1	/* tell headers to include MINIX stuff */
 #define _SYSTEM            1	/* tell headers that this is the kernel */
 /* The following are so basic, all the *.c files get them automatically. */
 #include <minix/config.h>	/* MUST be first */
 #include <minix/ansi.h>		/* MUST be second */
 #include <sys/types.h>
 #include <minix/const.h>
 #include <minix/type.h>
 #include <minix/dmap.h>
 #include <minix/vfsif.h>
 #include <limits.h>
 #include <errno.h>
 #include <minix/syslib.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <minix/sysutil.h>
 #include "const.h"
 #include "proto.h"
 #include "glo.h"
 #endif
--- a/servers/apfs/glo.h
+++ b/servers/apfs/glo.h
@ -0,0 +1,29 @@
 #ifndef __PFS_GLO_H__
 #define __PFS_GLO_H__
 /* EXTERN should be extern except for the table file */
 #ifdef _TABLE
 #undef EXTERN
 #define EXTERN
 #endif
 #include <minix/vfsif.h>
 /* The following variables are used for returning results to the caller. */
 EXTERN int err_code;		/* temporary storage for error number */
 EXTERN _PROTOTYPE (int (*fs_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* fs call table */
 EXTERN _PROTOTYPE (int (*dev_call_vec[]), (message *fs_m_in, message *fs_m_out) ); /* dev call table */
 EXTERN uid_t caller_uid;
 EXTERN gid_t caller_gid;
 EXTERN int req_nr;
 EXTERN int SELF_E;
 EXTERN int exitsignaled;
 EXTERN int busy;
 EXTERN int unmountdone;
 /* Inode map. */
 EXTERN bitchunk_t inodemap[FS_BITMAP_CHUNKS(NR_INODES)];
 #endif
--- a/servers/apfs/inc.h
+++ b/servers/apfs/inc.h
@ -0,0 +1,41 @@
 #define _SYSTEM            1    /* get OK and negative error codes */
 #define _MINIX             1	/* tell headers to include MINIX stuff */
 #define VERBOSE		   0	/* display diagnostics */
 #ifdef __NBSD_LIBC
 #include <sys/ioc_net.h>
 #else
 #include <net/ioctl.h>
 #endif
 #include <minix/ansi.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/select.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <limits.h>
 #include <errno.h>
 #include <signal.h>
 #include <unistd.h>
 #include <minix/callnr.h>
 #include <minix/config.h>
 #include <minix/dmap.h>
 #include <minix/type.h>
 #include <minix/const.h>
 #include <minix/com.h>
 #include <minix/syslib.h>
 #include <minix/sysutil.h>
 #include <minix/keymap.h>
 #include <minix/bitmap.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #include <signal.h>
 #include "proto.h"
--- a/servers/apfs/inode.c
+++ b/servers/apfs/inode.c
@ -0,0 +1,334 @@
 /* This file manages the inode table.  There are procedures to allocate and
 * deallocate inodes, acquire, erase, and release them, and read and write
 * them from the disk.
 *
 * The entry points into this file are
 *   get_inode:	   search inode table for a given inode; if not there,
 *                 read it
 *   put_inode:	   indicate that an inode is no longer needed in memory
 *   alloc_inode:  allocate a new, unused inode
 *   wipe_inode:   erase some fields of a newly allocated inode
 *   free_inode:   mark an inode as available for a new file
 *   update_times: update atime, ctime, and mtime
 *   find_inode:   retrieve pointer to inode in inode cache
 *
 */
 #include "fs.h"
 #include "buf.h"
 #include "inode.h"
 #include <minix/vfsif.h>
 FORWARD _PROTOTYPE( void addhash_inode, (struct inode * const node)		);
 FORWARD _PROTOTYPE( void unhash_inode, (struct inode * const node) 		);
 /*===========================================================================*
 *				fs_putnode				     *
 *===========================================================================*/
 PUBLIC int fs_putnode(message *fs_m_in, message *fs_m_out)
 {
 /* Find the inode specified by the request message and decrease its counter.*/
  struct inode *rip;
  int count;
  dev_t dev;
  ino_t inum;
  rip = find_inode( (ino_t) fs_m_in->REQ_INODE_NR);
  if(!rip) {
 	  printf("%s:%d put_inode: inode #%ld dev: %d not found\n", __FILE__,
 		 __LINE__, fs_m_in->REQ_INODE_NR, (dev_t) fs_m_in->REQ_DEV);
 	  panic("fs_putnode failed");
  }
  count = fs_m_in->REQ_COUNT;
  if (count <= 0) {
 	printf("%s:%d put_inode: bad value for count: %d\n", __FILE__,
 	       __LINE__, count);
 	panic("fs_putnode failed");
  } else if(count > rip->i_count) {
 	printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__,
 	       __LINE__, count, rip->i_count);
 	panic("fs_putnode failed");
  }
  /* Decrease reference counter, but keep one reference; it will be consumed by
   * put_inode(). */
  rip->i_count -= count - 1;
  dev = rip->i_dev;
  inum = rip->i_num;
  put_inode(rip);
  if (rip->i_count == 0) put_block(dev, inum);
  return(OK);
 }
 /*===========================================================================*
 *				init_inode_cache			     *
 *===========================================================================*/
 PUBLIC void init_inode_cache()
 {
  struct inode *rip;
  struct inodelist *rlp;
  /* init free/unused list */
  TAILQ_INIT(&unused_inodes);
  /* init hash lists */
  for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp)
      LIST_INIT(rlp);
  /* add free inodes to unused/free list */
  for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) {
      rip->i_num = NO_ENTRY;
      TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
  }
  /* Reserve the first inode (bit 0) to prevent it from being allocated later*/
  if (alloc_bit() != NO_BIT) printf("PFS could not reserve NO_BIT\n");
  busy = 0; /* This bit does not make the server 'in use/busy'. */
 }
 /*===========================================================================*
 *				addhash_inode   			     *
 *===========================================================================*/
 PRIVATE void addhash_inode(struct inode * const node)
 {
  int hashi = (int) (node->i_num & INODE_HASH_MASK);
  /* insert into hash table */
  LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash);
 }
 /*===========================================================================*
 *				unhash_inode      			     *
 *===========================================================================*/
 PRIVATE void unhash_inode(struct inode * const node)
 {
  /* remove from hash table */
  LIST_REMOVE(node, i_hash);
 }
 /*===========================================================================*
 *				get_inode				     *
 *===========================================================================*/
 PUBLIC struct inode *get_inode(
  dev_t dev,		/* device on which inode resides */
  ino_t numb		/* inode number */
 )
 {
 /* Find the inode in the hash table. If it is not there, get a free inode
 * load it from the disk if it's necessary and put on the hash list
 */
  register struct inode *rip;
  int hashi;
  hashi = (int) (numb & INODE_HASH_MASK);
  /* Search inode in the hash table */
  LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
 	if (rip->i_num == numb && rip->i_dev == dev) {
 		/* If unused, remove it from the unused/free list */
 		if (rip->i_count == 0) {
 			TAILQ_REMOVE(&unused_inodes, rip, i_unused);
 		}
 		++rip->i_count;
 		return(rip);
 	}
  }
  /* Inode is not on the hash, get a free one */
  if (TAILQ_EMPTY(&unused_inodes)) {
      err_code = ENFILE;
      return(NULL);
  }
  rip = TAILQ_FIRST(&unused_inodes);
  /* If not free unhash it */
  if (rip->i_num != NO_ENTRY) unhash_inode(rip);
  /* Inode is not unused any more */
  TAILQ_REMOVE(&unused_inodes, rip, i_unused);
  /* Load the inode. */
  rip->i_dev = dev;
  rip->i_num = numb;
  rip->i_count = 1;
  rip->i_update = 0;		/* all the times are initially up-to-date */
  /* Add to hash */
  addhash_inode(rip);
  return(rip);
 }
 /*===========================================================================*
 *				find_inode        			     *
 *===========================================================================*/
 PUBLIC struct inode *find_inode(numb)
 ino_t numb;		/* inode number */
 {
 /* Find the inode specified by the inode and device number.
 */
  struct inode *rip;
  int hashi;
  hashi = (int) (numb & INODE_HASH_MASK);
  /* Search inode in the hash table */
  LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
      if (rip->i_count > 0 && rip->i_num == numb) {
          return(rip);
      }
  }
  return(NULL);
 }
 /*===========================================================================*
 *				put_inode				     *
 *===========================================================================*/
 PUBLIC void put_inode(rip)
 struct inode *rip;	/* pointer to inode to be released */
 {
 /* The caller is no longer using this inode.  If no one else is using it either
 * write it back to the disk immediately.  If it has no links, truncate it and
 * return it to the pool of available inodes.
 */
  if (rip == NULL) return;	/* checking here is easier than in caller */
  if (rip->i_count < 1)
 	panic("put_inode: i_count already below 1: %d", rip->i_count);
  if (--rip->i_count == 0) {	/* i_count == 0 means no one is using it now */
 	if (rip->i_nlinks == NO_LINK) { /* Are there links to this file? */
 		/* no links, free the inode. */
 		truncate_inode(rip, 0);	/* return all the disk blocks */
 		rip->i_mode = I_NOT_ALLOC;	/* clear I_TYPE field */
 		free_inode(rip);
 	} else {
 		truncate_inode(rip, (off_t) 0);
 	}
 	if (rip->i_nlinks == NO_LINK) {
 		/* free, put at the front of the LRU list */
 		unhash_inode(rip);
 		rip->i_num = NO_ENTRY;
 		rip->i_dev = NO_DEV;
 		rip->i_rdev = NO_DEV;
 		TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
 	} else {
 		/* unused, put at the back of the LRU (cache it) */
 		TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused);
 	}
  }
 }
 /*===========================================================================*
 *				alloc_inode				     *
 *===========================================================================*/
 PUBLIC struct inode *alloc_inode(dev_t dev, mode_t bits)
 {
 /* Allocate a free inode on 'dev', and return a pointer to it. */
  register struct inode *rip;
  bit_t b;
  ino_t i_num;
  b = alloc_bit();
  if (b == NO_BIT) {
 	err_code = ENOSPC;
 	printf("PipeFS is out of inodes\n");
 	return(NULL);
  }
  i_num = (ino_t) b;
  /* Try to acquire a slot in the inode table. */
  if ((rip = get_inode(dev, i_num)) == NULL) {
 	/* No inode table slots available.  Free the inode if just allocated.*/
 	if (dev == NO_DEV) free_bit(b);
  } else {
 	/* An inode slot is available. */
 	rip->i_mode = bits;		/* set up RWX bits */
 	rip->i_nlinks = NO_LINK;	/* initial no links */
 	rip->i_uid = caller_uid;	/* file's uid is owner's */
 	rip->i_gid = caller_gid;	/* ditto group id */
 	/* Fields not cleared already are cleared in wipe_inode().  They have
 	 * been put there because truncate() needs to clear the same fields if
 	 * the file happens to be open while being truncated.  It saves space
 	 * not to repeat the code twice.
 	 */
 	wipe_inode(rip);
  }
  return(rip);
 }
 /*===========================================================================*
 *				wipe_inode				     *
 *===========================================================================*/
 PUBLIC void wipe_inode(rip)
 struct inode *rip;	/* the inode to be erased */
 {
 /* Erase some fields in the inode.  This function is called from alloc_inode()
 * when a new inode is to be allocated, and from truncate(), when an existing
 * inode is to be truncated.
 */
  rip->i_size = 0;
  rip->i_update = ATIME | CTIME | MTIME;	/* update all times later */
 }
 /*===========================================================================*
 *				free_inode				     *
 *===========================================================================*/
 PUBLIC void free_inode(rip)
 struct inode *rip;
 {
 /* Return an inode to the pool of unallocated inodes. */
  bit_t b;
  if (rip->i_num <= (ino_t) 0 || rip->i_num >= (ino_t) NR_INODES) return;
  b = (bit_t) rip->i_num;
  free_bit(b);
 }
 /*===========================================================================*
 *				update_times				     *
 *===========================================================================*/
 PUBLIC void update_times(rip)
 struct inode *rip;	/* pointer to inode to be read/written */
 {
 /* Various system calls are required by the standard to update atime, ctime,
 * or mtime.  Since updating a time requires sending a message to the clock
 * task--an expensive business--the times are marked for update by setting
 * bits in i_update.  When a stat, fstat, or sync is done, or an inode is
 * released, update_times() may be called to actually fill in the times.
 */
  time_t cur_time;
  cur_time = clock_time();
  if (rip->i_update & ATIME) rip->i_atime = cur_time;
  if (rip->i_update & CTIME) rip->i_ctime = cur_time;
  if (rip->i_update & MTIME) rip->i_mtime = cur_time;
  rip->i_update = 0;		/* they are all up-to-date now */
 }
--- a/servers/apfs/inode.h
+++ b/servers/apfs/inode.h
@ -0,0 +1,39 @@
 #ifndef __PFS_INODE_H__
 #define __PFS_INODE_H__
 /* Inode table.  This table holds inodes that are currently in use.
 */
 #include <sys/queue.h>
 EXTERN struct inode {
  mode_t i_mode;		/* file type, protection, etc. */
  nlink_t i_nlinks;		/* how many links to this file */
  uid_t i_uid;			/* user id of the file's owner */
  gid_t i_gid;			/* group number */
  off_t i_size;			/* current file size in bytes */
  time_t i_atime;		/* time of last access (V2 only) */
  time_t i_mtime;		/* when was file data last changed */
  time_t i_ctime;		/* when was inode itself changed (V2 only)*/
  /* The following items are not present on the disk. */
  dev_t i_dev;			/* which device is the inode on */
  dev_t i_rdev;			/* which special device is the inode on */
  ino_t i_num;			/* inode number on its (minor) device */
  int i_count;			/* # times inode used; 0 means slot is free */
  char i_update;		/* the ATIME, CTIME, and MTIME bits are here */
  LIST_ENTRY(inode) i_hash;     /* hash list */
  TAILQ_ENTRY(inode) i_unused;  /* free and unused list */
 } inode[NR_INODES];
 /* list of unused/free inodes */
 EXTERN TAILQ_HEAD(unused_inodes_t, inode)  unused_inodes;
 /* inode hashtable */
 EXTERN LIST_HEAD(inodelist, inode)         hash_inodes[INODE_HASH_SIZE];
 #endif
--- a/servers/apfs/link.c
+++ b/servers/apfs/link.c
@ -0,0 +1,50 @@
 #include "fs.h"
 #include "buf.h"
 #include "inode.h"
 #include <minix/vfsif.h>
 /*===========================================================================*
 *				fs_ftrunc				     *
 *===========================================================================*/
 PUBLIC int fs_ftrunc(message *fs_m_in, message *fs_m_out)
 {
  struct inode *rip;
  off_t start, end;
  ino_t inumb;
  inumb = (ino_t) fs_m_in->REQ_INODE_NR;
  if( (rip = find_inode(inumb)) == NULL) return(EINVAL);
  start = fs_m_in->REQ_TRC_START_LO;
  end = fs_m_in->REQ_TRC_END_LO;
  return truncate_inode(rip, start);
 }
 /*===========================================================================*
 *				truncate_inode				     *
 *===========================================================================*/
 PUBLIC int truncate_inode(rip, newsize)
 register struct inode *rip;	/* pointer to inode to be truncated */
 off_t newsize;			/* inode must become this size */
 {
 /* Set inode to a certain size, freeing any zones no longer referenced
 * and updating the size in the inode. If the inode is extended, the
 * extra space is a hole that reads as zeroes.
 *
 * Nothing special has to happen to file pointers if inode is opened in
 * O_APPEND mode, as this is different per fd and is checked when
 * writing is done.
 */
  /* Pipes can shrink, so adjust size to make sure all zones are removed. */
  if(newsize != 0) return(EINVAL);	/* Only truncate pipes to 0. */
  rip->i_size = newsize;
  /* Next correct the inode size. */
  wipe_inode(rip);	/* Pipes can only be truncated to 0. */
  return(OK);
 }
--- a/servers/apfs/main.c
+++ b/servers/apfs/main.c
@ -0,0 +1,187 @@
 #include "fs.h"
 #include <assert.h>
 #include <signal.h>
 #include <minix/dmap.h>
 #include <minix/driver.h>
 #include <minix/endpoint.h>
 #include <minix/vfsif.h>
 #include "buf.h"
 #include "inode.h"
 #include "uds.h"
 FORWARD _PROTOTYPE(void get_work, (message *m_in)			);
 /* SEF functions and variables. */
 FORWARD _PROTOTYPE( void sef_local_startup, (void) );
 FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
 FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
 /*===========================================================================*
 *				main                                         *
 *===========================================================================*/
 PUBLIC int main(int argc, char *argv[])
 {
 /* This is the main routine of this service. The main loop consists of
 * three major activities: getting new work, processing the work, and
 * sending the reply. The loop never terminates, unless a panic occurs.
 */
  int ind, do_reply, transid;
  message pfs_m_in;
  message pfs_m_out;
  /* SEF local startup. */
  env_setargs(argc, argv);
  sef_local_startup();
  printf("Started APFS\n");
  while(!unmountdone || !exitsignaled) {
 	endpoint_t src;
 	do_reply = 1;
 	/* Wait for request message. */
 	get_work(&pfs_m_in);
 	transid = TRNS_GET_ID(pfs_m_in.m_type);
 	pfs_m_in.m_type = TRNS_DEL_ID(pfs_m_in.m_type);
 	if (pfs_m_in.m_type == 0) {
 		assert(!IS_VFS_FS_TRANSID(transid));
 		pfs_m_in.m_type = transid;
 		transid = 0;
 	} else
 		assert(IS_VFS_FS_TRANSID(transid) || transid == 0);
 	src = pfs_m_in.m_source;
 	caller_uid = INVAL_UID;	/* To trap errors */
 	caller_gid = INVAL_GID;
 	req_nr = pfs_m_in.m_type;
 	if (IS_DEV_RQ(req_nr)) {
 		ind = req_nr - DEV_RQ_BASE;
 		if (ind < 0 || ind >= DEV_CALL_VEC_SIZE) {
 			printf("pfs: bad DEV request %d\n", req_nr);
 			pfs_m_out.m_type = EINVAL;
 		} else {
 			int result;
 			result = (*dev_call_vec[ind])(&pfs_m_in, &pfs_m_out);
 			if (pfs_m_out.REP_STATUS == SUSPEND ||
 			    result == SUSPEND) {
 				/* Nothing to tell, so not replying */
 				do_reply = 0;
 			}
 		}
 	} else if (IS_VFS_RQ(req_nr)) {
 		ind = req_nr - VFS_BASE;
 		if (ind < 0 || ind >= FS_CALL_VEC_SIZE) {
 			printf("pfs: bad FS request %d\n", req_nr);
 			pfs_m_out.m_type = EINVAL;
 		} else {
 			pfs_m_out.m_type =
 				(*fs_call_vec[ind])(&pfs_m_in, &pfs_m_out);
 		}
 	} else {
 		printf("pfs: bad request %d\n", req_nr);
 		pfs_m_out.m_type = EINVAL;
 	}
 	if (do_reply) {
 		if (IS_VFS_RQ(req_nr) && IS_VFS_FS_TRANSID(transid)) {
 			pfs_m_out.m_type = TRNS_ADD_ID(pfs_m_out.m_type,
 							transid);
 		}
 		reply(src, &pfs_m_out);
 	}
  }
  return(OK);
 }
 /*===========================================================================*
 *			       sef_local_startup			     *
 *===========================================================================*/
 PRIVATE void sef_local_startup()
 {
  /* Register init callbacks. */
  sef_setcb_init_fresh(sef_cb_init_fresh);
  sef_setcb_init_restart(sef_cb_init_fail);
  /* No live update support for now. */
  /* Register signal callbacks. */
  sef_setcb_signal_handler(sef_cb_signal_handler);
  /* Let SEF perform startup. */
  sef_startup();
 }
 /*===========================================================================*
 *		            sef_cb_init_fresh                                *
 *===========================================================================*/
 PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
 {
 /* Initialize the pipe file server. */
  int i;
  /* Initialize main loop parameters. */
  exitsignaled = 0;	/* No exit request seen yet. */
  busy = 0;		/* Server is not 'busy' (i.e., inodes in use). */
  /* Init inode table */
  for (i = 0; i < NR_INODES; ++i) {
 	inode[i].i_count = 0;
  }
  init_inode_cache();
  uds_init();
  SELF_E = getprocnr();
  buf_pool();
  driver_announce();
  return(OK);
 }
 /*===========================================================================*
 *		           sef_cb_signal_handler                             *
 *===========================================================================*/
 PRIVATE void sef_cb_signal_handler(int signo)
 {
  /* Only check for termination signal, ignore anything else. */
  if (signo != SIGTERM) return;
  exitsignaled = 1;
 }
 /*===========================================================================*
 *				get_work				     *
 *===========================================================================*/
 PRIVATE void get_work(m_in)
 message *m_in;				/* pointer to message */
 {
  int r, srcok = 0, status;
  endpoint_t src;
  do {
 	/* wait for a message */
 	if ((r = sef_receive_status(ANY, m_in, &status)) != OK)
 		panic("sef_receive_status failed: %d", r);
 	src = m_in->m_source;
 	if(src == VFS_PROC_NR) {
 		srcok = 1;		/* Normal FS request. */
 	} else
 		printf("PFS: unexpected source %d\n", src);
  } while(!srcok);
 }
 /*===========================================================================*
 *				reply					     *
 *===========================================================================*/
 PUBLIC void reply(who, m_out)
 endpoint_t who;
 message *m_out;                       	/* report result */
 {
  if (OK != send(who, m_out))	/* send the message */
 	printf("PFS(%d) was unable to send reply\n", SELF_E);
 }
--- a/servers/apfs/misc.c
+++ b/servers/apfs/misc.c
@ -0,0 +1,12 @@
 #include "fs.h"
 /*===========================================================================*
 *				fs_sync					     *
 *===========================================================================*/
 PUBLIC int fs_sync(message *fs_m_in, message *fs_m_out)
 {
 /* Perform the sync() system call.  No-op on this FS. */
  return(OK);		/* sync() can't fail */
 }
--- a/servers/apfs/mount.c
+++ b/servers/apfs/mount.c
@ -0,0 +1,18 @@
 #include "fs.h"
 #include "glo.h"
 /*===========================================================================*
 *				fs_unmount				     *
 *===========================================================================*/
 PUBLIC int fs_unmount(message *fs_m_in, message *fs_m_out)
 {
 /* Unmount Pipe File Server. */
  if (busy) return(EBUSY);	/* can't umount a busy file system */
  /* Finish off the unmount. */
  unmountdone = TRUE;
  return(OK);
 }
--- a/servers/apfs/open.c
+++ b/servers/apfs/open.c
@ -0,0 +1,52 @@
 #include "fs.h"
 #include <sys/stat.h>
 #include "buf.h"
 #include "inode.h"
 #include <minix/vfsif.h>
 /*===========================================================================*
 *				fs_newnode				     *
 *===========================================================================*/
 PUBLIC int fs_newnode(message *fs_m_in, message *fs_m_out)
 {
  register int r = OK;
  mode_t bits;
  struct inode *rip;
  dev_t dev;
  caller_uid = (uid_t) fs_m_in->REQ_UID;
  caller_gid = (gid_t) fs_m_in->REQ_GID;
  bits = (mode_t) fs_m_in->REQ_MODE;
  dev = (dev_t) fs_m_in->REQ_DEV;
  /* Try to allocate the inode */
  if( (rip = alloc_inode(dev, bits) ) == NULL) return(err_code);
  switch (bits & S_IFMT) {
 	case S_IFBLK:
 	case S_IFCHR:
 		rip->i_rdev = dev;		/* Major/minor dev numbers */
 		break;
 	case S_IFIFO:
 		if ((get_block(dev, rip->i_num)) == NULL)
 			r = EIO;
 		break;
 	default:
 		r = EIO; /* Unsupported file type */
  }
  if (r != OK) {
 	free_inode(rip);
  } else {
 	/* Fill in the fields of the response message */
 	fs_m_out->RES_INODE_NR = rip->i_num;
 	fs_m_out->RES_MODE = rip->i_mode;
 	fs_m_out->RES_FILE_SIZE_LO = rip->i_size;
 	fs_m_out->RES_UID = rip->i_uid;
 	fs_m_out->RES_GID = rip->i_gid;
 	fs_m_out->RES_DEV = dev;
  }
  return(r);
 }
--- a/servers/apfs/proto.h
+++ b/servers/apfs/proto.h
@ -0,0 +1,104 @@
 #ifndef __PFS_PROTO_H__
 #define __PFS_PROTO_H__
 /* Function prototypes. */
 /* Structs used in prototypes must be declared as such first. */
 struct buf;
 struct inode;
 struct sockaddr_un;
 struct ancillary;
 /* buffer.c */
 _PROTOTYPE( struct buf *get_block, (dev_t dev, ino_t inum)		);
 _PROTOTYPE( void put_block, (dev_t dev, ino_t inum)			);
 /* cache.c */
 _PROTOTYPE( void buf_pool, (void)					);
 /* inode.c */
 _PROTOTYPE( struct inode *alloc_inode, (dev_t dev, mode_t mode)		);
 _PROTOTYPE( void dup_inode, (struct inode *ip)				);
 _PROTOTYPE( struct inode *find_inode, (ino_t numb)			);
 _PROTOTYPE( void free_inode, (struct inode *rip)			);
 _PROTOTYPE( int fs_putnode, (message *fs_m_in, message *fs_m_out)	);
 _PROTOTYPE( void init_inode_cache, (void)				);
 _PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb)		);
 _PROTOTYPE( void put_inode, (struct inode *rip)				);
 _PROTOTYPE( void update_times, (struct inode *rip)			);
 _PROTOTYPE( void wipe_inode, (struct inode *rip)			);
 /* link.c */
 _PROTOTYPE( int fs_ftrunc, (message *fs_m_in, message *fs_m_out)	);
 _PROTOTYPE( int truncate_inode, (struct inode *rip, off_t newsize)	);
 /* main.c */
 _PROTOTYPE( void reply, (endpoint_t who, message *m_out)		);
 /* misc.c */
 _PROTOTYPE( int fs_sync, (message *fs_m_in, message *fs_m_out)		);
 /* mount.c */
 _PROTOTYPE( int fs_unmount, (message *fs_m_in, message *fs_m_out)	);
 /* open.c */
 _PROTOTYPE( int fs_newnode, (message *fs_m_in, message *fs_m_out)	);
 /* read.c */
 _PROTOTYPE( int fs_readwrite, (message *fs_m_in, message *fs_m_out)	);
 /* utility.c */
 _PROTOTYPE( time_t clock_time, (void)					);
 _PROTOTYPE( int no_sys, (message *pfs_m_in, message *pfs_m_out)		);
 /* stadir.c */
 _PROTOTYPE( int fs_stat, (message *fs_m_in, message *fs_m_out)		);
 /* super.c */
 _PROTOTYPE( bit_t alloc_bit, (void)					);
 _PROTOTYPE( void free_bit, (bit_t bit_returned)				);
 /* dev_uds.c */
 _PROTOTYPE( int uds_open, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int uds_close, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int uds_read, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int uds_write, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int uds_ioctl, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int uds_select, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int uds_unsuspend, (endpoint_t m_source, int minor)		);
 _PROTOTYPE( int uds_cancel, (message *dev_m_in, message *dev_m_out)	);
 /* uds.c */
 _PROTOTYPE( void uds_init, (void)					);
 _PROTOTYPE( int do_accept, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_connect, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_listen, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_socket, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_bind, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_getsockname, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_getpeername, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_shutdown, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_socketpair, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_getsockopt_sotype,
 				(message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_getsockopt_peercred,
 				(message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_getsockopt_sndbuf,
 				(message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_setsockopt_sndbuf,
 				(message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_getsockopt_rcvbuf,
 				(message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_setsockopt_rcvbuf,
 				(message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_sendto, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_recvfrom, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_sendmsg, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int do_recvmsg, (message *dev_m_in, message *dev_m_out)	);
 _PROTOTYPE( int perform_connection,
 				(message *dev_m_in, message *dev_m_out,
 				struct sockaddr_un *addr, int minorx,
 				int minory)				);
 _PROTOTYPE( int clear_fds, (int minor, struct ancillary *data)		);
 #endif
--- a/servers/apfs/read.c
+++ b/servers/apfs/read.c
@ -0,0 +1,89 @@
 #include "fs.h"
 #include "buf.h"
 #include <minix/com.h>
 #include "inode.h"
 /*===========================================================================*
 *				fs_readwrite				     *
 *===========================================================================*/
 PUBLIC int fs_readwrite(message *fs_m_in, message *fs_m_out)
 {
  int r, rw_flag;
  struct buf *bp;
  cp_grant_id_t gid;
  off_t position, f_size;
  unsigned int nrbytes, cum_io;
  mode_t mode_word;
  struct inode *rip;
  ino_t inumb;
  r = OK;
  cum_io = 0;
  inumb = (ino_t) fs_m_in->REQ_INODE_NR;
  /* Find the inode referred */
  if ((rip = find_inode(inumb)) == NULL) return(EINVAL);
  mode_word = rip->i_mode & I_TYPE;
  if (mode_word != I_NAMED_PIPE) return(EIO);
  f_size = rip->i_size;
  /* Get the values from the request message */
  rw_flag = (fs_m_in->m_type == REQ_READ ? READING : WRITING);
  gid = (cp_grant_id_t) fs_m_in->REQ_GRANT;
  position = fs_m_in->REQ_SEEK_POS_LO;
  nrbytes = (unsigned) fs_m_in->REQ_NBYTES;
  /* We can't read beyond the max file position */
  if (nrbytes > MAX_FILE_POS) return(EFBIG);
  if (rw_flag == WRITING) {
 	  /* Check in advance to see if file will grow too big. */
 	  /* Casting nrbytes to signed is safe, because it's guaranteed not to
 	     be beyond max signed value (i.e., MAX_FILE_POS). */
 	  if (position > PIPE_BUF - (signed) nrbytes) return(EFBIG);
  }
  /* Mark inode in use */
  if ((get_inode(rip->i_dev, rip->i_num)) == NULL) return(err_code);
  if ((bp = get_block(rip->i_dev, rip->i_num)) == NULL) return(err_code);
  if (rw_flag == READING) {
 	/* Copy a chunk from the block buffer to user space. */
 	r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) 0,
 		(vir_bytes) (bp->b_data+position), (size_t) nrbytes, D);
  } else {
 	/* Copy a chunk from user space to the block buffer. */
 	r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) 0,
 		(vir_bytes) (bp->b_data+position), (size_t) nrbytes, D);
  }
  if (r == OK) {
 	position += (signed) nrbytes; /* Update position */
 	cum_io += nrbytes;
  }
  fs_m_out->RES_SEEK_POS_LO = position; /* It might change later and the VFS
 					   has to know this value */
  /* On write, update file size and access time. */
  if (rw_flag == WRITING) {
 	  if (position > f_size) rip->i_size = position;
  } else {
 	if(position >= rip->i_size) {
 		/* All data in the pipe is read, so reset pipe pointers */
 		rip->i_size = 0;	/* no data left */
 		position = 0;		/* reset reader(s) */
 	}
  }
  bp->b_bytes = position;
  if (rw_flag == READING) rip->i_update |= ATIME;
  if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
  fs_m_out->RES_NBYTES = (size_t) cum_io;
  put_inode(rip);
  put_block(rip->i_dev, rip->i_num);
  return(r);
 }
--- a/servers/apfs/stadir.c
+++ b/servers/apfs/stadir.c
@ -0,0 +1,70 @@
 #include "fs.h"
 #include "inode.h"
 #include <string.h>
 #include <sys/stat.h>
 /*===========================================================================*
 *				stat_inode				     *
 *===========================================================================*/
 PRIVATE int stat_inode(
  register struct inode *rip,	/* pointer to inode to stat */
  endpoint_t who_e,		/* Caller endpoint */
  cp_grant_id_t gid		/* grant for the stat buf */
 )
 {
 /* Common code for stat and fstat system calls. */
  mode_t type;
  struct stat statbuf;
  u32_t blocks; /* The unit of this is 512 */
  int r, s;
  type = rip->i_mode & I_TYPE;
  s = (type == I_CHAR_SPECIAL || type == I_BLOCK_SPECIAL);
  /* Update the atime, ctime, and mtime fields in the inode, if need be. */
  if (rip->i_update) update_times(rip);
  blocks = rip->i_size / S_BLKSIZE;
  if (rip->i_size % S_BLKSIZE != 0)
 	blocks += 1;
  memset(&statbuf, 0, sizeof(struct stat));
  statbuf.st_dev = rip->i_dev;
  statbuf.st_ino = rip->i_num;
  statbuf.st_mode = rip->i_mode;
  statbuf.st_nlink = rip->i_nlinks;
  statbuf.st_uid = rip->i_uid;
  statbuf.st_gid = (short int) rip->i_gid;
  statbuf.st_rdev = (dev_t) (s ? rip->i_rdev : NO_DEV);
  statbuf.st_size = rip->i_size;
  if (!s)  statbuf.st_mode &= ~I_REGULAR;/* wipe out I_REGULAR bit for pipes */
  statbuf.st_atime = rip->i_atime;
  statbuf.st_mtime = rip->i_mtime;
  statbuf.st_ctime = rip->i_ctime;
  statbuf.st_blksize = PIPE_BUF;
  statbuf.st_blocks = blocks;
  /* Copy the struct to user space. */
  r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf,
 		(size_t) sizeof(statbuf), D);
  return(r);
 }
 /*===========================================================================*
 *                             fs_stat					     *
 *===========================================================================*/
 PUBLIC int fs_stat(message *fs_m_in, message *fs_m_out)
 {
  register int r;              /* return value */
  register struct inode *rip;  /* target inode */
  if( (rip = find_inode(fs_m_in->REQ_INODE_NR)) == NULL) return(EINVAL);
  get_inode(rip->i_dev, rip->i_num);	/* mark inode in use */
  r = stat_inode(rip, fs_m_in->m_source, (cp_grant_id_t) fs_m_in->REQ_GRANT);
  put_inode(rip);			/* release the inode */
  return(r);
 }
--- a/servers/apfs/super.c
+++ b/servers/apfs/super.c
@ -0,0 +1,75 @@
 /* This file manages the super block table and the related data structures,
 * namely, the bit maps that keep track of which zones and which inodes are
 * allocated and which are free.  When a new inode or zone is needed, the
 * appropriate bit map is searched for a free entry.
 *
 * The entry points into this file are
 *   alloc_bit:       somebody wants to allocate a zone or inode; find one
 *   free_bit:        indicate that a zone or inode is available for allocation
 */
 #include "fs.h"
 #include "buf.h"
 #include "inode.h"
 #include "const.h"
 /*===========================================================================*
 *				alloc_bit				     *
 *===========================================================================*/
 PUBLIC bit_t alloc_bit(void)
 {
 /* Allocate a bit from a bit map and return its bit number. */
  bitchunk_t *wptr, *wlim;
  bit_t b;
  unsigned int i, bcount;
  bcount = FS_BITMAP_CHUNKS(NR_INODES); /* Inode map has this many chunks. */
  wlim = &inodemap[bcount]; /* Point to last chunk in inodemap. */
  for (wptr = &inodemap[0]; wptr < wlim; wptr++) {
 	/* Does this word contain a free bit? */
 	if (*wptr == (bitchunk_t) ~0) continue; /* No. Go to next word */
 	/* Find and allocate the free bit. */
 	for (i = 0; (*wptr & (1 << i)) != 0; ++i) {}
 	/* Get inode number */
 	b = (bit_t) ((wptr - &inodemap[0]) * FS_BITCHUNK_BITS + i);
 	/* Don't allocate bits beyond end of map. */
 	if (b >= NR_INODES) break;
 	/* Allocate and return bit number. */
 	*wptr |= 1 << i;
 	/* Mark server 'busy' */
 	busy++;
 	return(b);
  }
  return(NO_BIT);			/* no bit could be allocated */
 }
 /*===========================================================================*
 *				free_bit				     *
 *===========================================================================*/
 PUBLIC void free_bit(bit_returned)
 bit_t bit_returned;		/* number of bit to insert into the inode map*/
 {
  bitchunk_t *k, mask;
  bit_t bit;
  unsigned word;
  /* Get word offset and bit within offset */
  word = (unsigned) (bit_returned / (bit_t) FS_BITCHUNK_BITS);
  bit = bit_returned % (bit_t) FS_BITCHUNK_BITS;
  /* Unset bit */
  k = &inodemap[word];
  mask = (unsigned) 1 << bit;
  *k &= ~mask;
  busy--; /* One inode less in use. */
 }
--- a/servers/apfs/table.c
+++ b/servers/apfs/table.c
@ -0,0 +1,82 @@
 /* This file contains the table used to map system call numbers onto the
 * routines that perform them.
 */
 #define _TABLE
 #include "fs.h"
 #include "inode.h"
 #include "buf.h"
 #include "uds.h"
 /* File System Handlers (pfs) */
 PUBLIC _PROTOTYPE (int (*fs_call_vec[]),
 				(message *fs_m_in, message *fs_m_out) ) = {
        no_sys,             /* 0   not used */
        no_sys,             /* 1   */
        fs_putnode,         /* 2   */
        no_sys,             /* 3   */
        fs_ftrunc,          /* 4   */
        no_sys,             /* 5   */
 	no_sys,             /* 6   */
        no_sys,             /* 7   */
        fs_stat,            /* 8   */
        no_sys,             /* 9   */
        no_sys,             /* 10  */
        no_sys,             /* 11  */
        no_sys,             /* 12  */
        no_sys,	            /* 13  */
        no_sys,             /* 14  */
        fs_unmount,         /* 15  */
 	fs_sync,            /* 16  */
        no_sys,             /* 17  */
        no_sys,	            /* 18  */
        fs_readwrite,	    /* 19  */
        fs_readwrite,	    /* 20  */
        no_sys,             /* 21  */
        no_sys,             /* 22  */
        no_sys,             /* 23  */
        no_sys,             /* 24  */
        no_sys,             /* 25  */
        no_sys,             /* 26  */
        no_sys,             /* 27  */
        no_sys,	            /* 28  */
        fs_newnode,	    /* 29  */
        no_sys,	            /* 30  */
        no_sys,	            /* 31  */
 	no_sys,             /* 32 */
 };
 /* Device Handlers (/dev/uds) */
 PUBLIC _PROTOTYPE (int (*dev_call_vec[]),
 				(message *dev_m_in, message *dev_m_out) ) = {
        uds_cancel,         /* 0  CANCEL */
        no_sys,             /* 1   */
        no_sys,             /* 2   */
        no_sys,             /* 3   */
        no_sys,             /* 4   */
        no_sys,             /* 5   */
 	uds_open,           /* 6  DEV_OPEN */
        uds_close,          /* 7  DEV_CLOSE */
        no_sys,             /* 8   */
        no_sys,             /* 9   */
        no_sys,             /* 10 TTY_SETPGRP */
        no_sys,             /* 11 TTY_EXIT */
        uds_select,         /* 12 DEV_SELECT */
        no_sys,             /* 13 DEV_STATUS */
        uds_open,           /* 14 DEV_REOPEN */
        no_sys,             /* 15  */
 	no_sys,             /* 16  */
        no_sys,             /* 17  */
        no_sys,	            /* 18  */
        no_sys,		    /* 19  */
        uds_read,	    /* 20 DEV_READ_S */
        uds_write,          /* 21 DEV_WRITE_S */
        no_sys,             /* 22 DEV_SCATTER_S */
        no_sys,             /* 23 DEV_GATHER_S */
        uds_ioctl,          /* 24 DEV_IOCTL_S */
        no_sys,             /* 25 DEV_MMAP_S */
 };
--- a/servers/apfs/uds.c
+++ b/servers/apfs/uds.c
--- a/servers/apfs/uds.h
+++ b/servers/apfs/uds.h
@ -0,0 +1,250 @@
 #ifndef __PFS_UDS_H__
 #define __PFS_UDS_H__
 /*
 * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
 *
 * Also See...
 *
 *   dev_uds.c, table.c, uds.c
 */
 #include <limits.h>
 #include <sys/types.h>
 #include <sys/ucred.h>
 #include <sys/un.h>
 #include <minix/endpoint.h>
 /* max connection backlog for incoming connections */
 #define UDS_SOMAXCONN 64
 typedef void* filp_id_t;
 /* ancillary data to be sent */
 struct ancillary {
 	filp_id_t filps[OPEN_MAX];
 	int fds[OPEN_MAX];
 	int nfiledes;
 	struct ucred cred;
 };
 /*
 * Internal State Information for a socket descriptor.
 */
 struct uds_fd {
 /* Flags */
 	enum UDS_STATE {
 		/* This file descriptor is UDS_FREE and can be allocated. */
 		UDS_FREE  = 0,
 		/* OR it is UDS_INUSE and can't be allocated. */
 		UDS_INUSE = 1
 	/* state is set to UDS_INUSE in uds_open(). state is Set to
 	 * UDS_FREE in uds_init() and uds_close(). state should be
 	 * checked prior to all operations.
 	 */
 	} state;
 /* Owner Info */
 	/* Socket Owner */
 	endpoint_t owner;
 	/* endpoint for suspend/resume */
 	endpoint_t endpoint;
 /* Pipe Housekeeping */
 	/* inode number on PFS -- each descriptor is backed by 1
 	 * PIPE which is allocated in uds_open() and freed in
 	 * uds_close(). Data is sent/written to a peer's PIPE.
 	 * Data is recv/read from this PIPE.
 	 */
 	ino_t inode_nr;
 	/* position in the PIPE where the data starts */
 	off_t pos;
 	/* size of data in the PIPE */
 	size_t size;
 	/* control read/write, set by uds_open() and shutdown(2).
 	 * Can be set to S_IRUSR|S_IWUSR, S_IRUSR, S_IWUSR, or 0
 	 * for read and write, read only, write only, or neither.
 	 * default is S_IRUSR|S_IWUSR.
 	 */
 	mode_t mode;
 /* Socket Info */
 	/* socket type - SOCK_STREAM, SOCK_DGRAM, or SOCK_SEQPACKET
 	 * Set by uds_ioctl(NWIOSUDSTYPE). It defaults to -1 in
 	 * uds_open(). Any action on a socket with type -1 besides
 	 * uds_ioctl(NWIOSUDSTYPE) and uds_close() will result in
 	 * an error.
 	 */
 	int type;
 	/* queue of pending connections for server sockets.
 	 * connect(2) inserts and accept(2) removes from the queue
 	 */
 	int backlog[UDS_SOMAXCONN];
 	/* requested connection backlog size. Set by listen(2)
 	 * Bounds (0 <= backlog_size <= UDS_SOMAXCONN)
 	 * Defaults to UDS_SOMAXCONN which is defined above.
 	 */
 	unsigned char backlog_size;
 	/* index of peer in uds_fd_table for connected sockets.
 	 * -1 is used to mean no peer. Assumptions: peer != -1 means
 	 * connected.
 	 */
 	int peer;
 	/* index of child (client sd returned by accept(2))
 	 * -1 is used to mean no child.
 	 */
 	int child;
 	/* address -- the address the socket is bound to.
 	 * Assumptions: addr.sun_family == AF_UNIX means its bound.
 	 */
 	struct sockaddr_un addr;
 	/* target -- where DGRAMs are sent to on the next uds_write(). */
 	struct sockaddr_un target;
 	/* source -- address where DGRAMs are from. used to fill in the
 	 * from address in recvfrom(2) and recvmsg(2).
 	 */
 	struct sockaddr_un source;
 	/* Flag (1 or 0) - listening for incoming connections.
 	 * Default to 0. Set to 1 by do_listen()
 	 */
 	int listening;
 	/* stores file pointers and credentials being sent between
 	 * processes with sendmsg(2) and recvmsg(2).
 	 */
 	struct ancillary ancillary_data;
 	/* Holds an errno. This is set when a connected socket is
 	 * closed and we need to pass ECONNRESET on to a suspended
 	 * peer.
 	 */
 	int err;
 /* Suspend/Revive Housekeeping */
 	/* SUSPEND State Flags */
 	enum UDS_SUSPENDED {
 		/* Socket isn't blocked. */
 		UDS_NOT_SUSPENDED     = 0,
 		/* Socket is blocked on read(2) waiting for data to read. */
 		UDS_SUSPENDED_READ    = 1,
 		/* Socket is blocked on write(2) for space to write data. */
 		UDS_SUSPENDED_WRITE   = 2,
 		/* Socket is blocked on connect(2) waiting for the server. */
 		UDS_SUSPENDED_CONNECT = 4,
 		/* Socket is blocked on accept(2) waiting for clients. */
 		UDS_SUSPENDED_ACCEPT  = 8
 	} suspended;
 	/* Flag (1 or 0) - thing socket was waiting for is ready.
 	 * If 1, then uds_status() will attempt the operation that
 	 * the socket was blocked on.
 	 */
 	int ready_to_revive;
 	/* i/o grant, saved for later use by suspended procs */
 	cp_grant_id_t io_gr;
 	/* is of i/o grant, saved for later use by suspended procs */
 	size_t io_gr_size;
 	/* Save the call number so that uds_cancel() can unwind the
 	 * call properly.
 	 */
 	int call_nr;
 	/* Save the IOCTL so uds_cancel() knows what got cancelled. */
 	int ioctl;
 	/* Flag (1 or 0) - the system call completed.
 	 * A doc I read said DEV_CANCEL might be called even though
 	 * the operation is finished. We use this variable to
 	 * determine if we should rollback the changes or not.
 	 */
 	int syscall_done;
 /* select() */
 	/* Flag (1 or 0) - the process blocked on select(2). When
 	 * selecting is 1 and I/O happens on this socket, then
 	 * select_proc should be notified.
 	 */
 	int selecting;
 	/* when a select is in progress, we notify() this endpoint
 	 * of new data.
 	 */
 	endpoint_t select_proc;
 	/* Options (SEL_RD, SEL_WR, SEL_ERR) that are requested. */
 	int sel_ops_in;
 	/* Options that are available for this socket. */
 	int sel_ops_out;
 	/* Flag (1 or 0) to be set to one before calling notify().
 	 * uds_status() will use the flag to locate this descriptor.
 	 */
 	int status_updated;
 };
 typedef struct uds_fd uds_fd_t;
 /* File Descriptor Table -- Defined in uds.c */
 EXTERN uds_fd_t uds_fd_table[NR_FDS];
 /*
 * Take message m and get the index in uds_fd_table.
 */
 #define uds_minor(m)	(minor((dev_t) m->DEVICE) & BYTE)
 /*
 * Fill in a reply message.
 */
 #define uds_set_reply(msg,type,endpoint,io_gr,status)	\
 	do {						\
 		(msg)->m_type = type;			\
 		(msg)->REP_ENDPT = endpoint;		\
 		(msg)->REP_IO_GRANT = io_gr;		\
 		(msg)->REP_STATUS = status;		\
 	} while (0)
 #define uds_sel_reply(msg,type,minor,ops)		\
 	do {						\
 		(msg)->m_type = type;			\
 		(msg)->DEV_MINOR = minor;			\
 		(msg)->DEV_SEL_OPS = ops;			\
 	} while (0)
 #endif
--- a/servers/apfs/utility.c
+++ b/servers/apfs/utility.c
@ -0,0 +1,33 @@
 #include "fs.h"
 /*===========================================================================*
 *				no_sys					     *
 *===========================================================================*/
 PUBLIC int no_sys(message *pfs_m_in, message *pfs_m_out)
 {
 /* Somebody has used an illegal system call number */
  printf("no_sys: invalid call 0x%x to pfs\n", req_nr);
  return(EINVAL);
 }
 /*===========================================================================*
 *				clock_time				     *
 *===========================================================================*/
 PUBLIC time_t clock_time()
 {
 /* This routine returns the time in seconds since 1.1.1970.  MINIX is an
 * astrophysically naive system that assumes the earth rotates at a constant
 * rate and that such things as leap seconds do not exist.
 */
  int r;
  clock_t uptime;	/* Uptime in ticks */
  time_t boottime;
  if ((r = getuptime2(&uptime, &boottime)) != OK)
 		panic("clock_time: getuptme2 failed: %d", r);
  return( (time_t) (boottime + (uptime/sys_hz())));
 }
--- a/servers/avfs/Makefile
+++ b/servers/avfs/Makefile
@ -0,0 +1,25 @@
 # Makefile for Virtual File System (VFS)
 .include <bsd.own.mk>
 PROG=	vfs
 SRCS=	main.c open.c read.c write.c pipe.c dmap.c \
 	path.c device.c mount.c link.c exec.c \
 	filedes.c stadir.c protect.c time.c \
 	lock.c misc.c utility.c select.c table.c \
 	vnode.c vmnt.c request.c fscall.c \
 	tll.c comm.c worker.c
 .if ${MKCOVERAGE} != "no"
 SRCS+=  gcov.c
 CPPFLAGS+= -DUSE_COVERAGE
 .endif
 DPADD+=	${LIBSYS} ${LIBTIMERS} ${LIBEXEC}
 LDADD+=	-lsys -ltimers -lexec -lmthread
 MAN=
 BINDIR?= /usr/sbin
 INSTALLFLAGS+=	-S 16k
 .include <minix.bootprog.mk>
--- a/servers/avfs/comm.c
+++ b/servers/avfs/comm.c
@ -0,0 +1,163 @@
 #include "fs.h"
 #include "glo.h"
 #include "vmnt.h"
 #include "fproc.h"
 #include <minix/vfsif.h>
 #include <assert.h>
 FORWARD _PROTOTYPE( int sendmsg, (struct vmnt *vmp, struct fproc *rfp)	);
 FORWARD _PROTOTYPE( int queuemsg, (struct vmnt *vmp)			);
 /*===========================================================================*
 *				sendmsg					     *
 *===========================================================================*/
 PRIVATE int sendmsg(vmp, rfp)
 struct vmnt *vmp;
 struct fproc *rfp;
 {
 /* This is the low level function that sends requests to FS processes.
 */
  int r, transid;
  if (vmp->m_fs_e == rfp->fp_endpoint) return(EDEADLK);
  vmp->m_comm.c_cur_reqs++;	/* One more request awaiting a reply */
  transid = rfp->fp_wtid + VFS_TRANSID;
  rfp->fp_sendrec->m_type = TRNS_ADD_ID(rfp->fp_sendrec->m_type, transid);
  if ((r = asynsend3(vmp->m_fs_e, rfp->fp_sendrec, AMF_NOREPLY)) != OK) {
 	printf("VFS: sendmsg: error sending message. "
 	       "FS_e: %d req_nr: %d err: %d\n", vmp->m_fs_e,
 	       rfp->fp_sendrec->m_type, r);
 		util_stacktrace();
 	return(r);
  }
  return(r);
 }
 /*===========================================================================*
 *				send_work				     *
 *===========================================================================*/
 PUBLIC void send_work(void)
 {
 /* Try to send out as many requests as possible */
  struct vmnt *vmp;
  if (sending == 0) return;
  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++)
 	fs_sendmore(vmp);
 }
 /*===========================================================================*
 *				fs_sendmore				     *
 *===========================================================================*/
 PUBLIC void fs_sendmore(struct vmnt *vmp)
 {
  struct worker_thread *worker;
  /* Can we send more requests? */
  if (vmp->m_fs_e == NONE) return;
  if ((worker = vmp->m_comm.c_req_queue) == NULL) /* No process is queued */
 	return;
  if (vmp->m_comm.c_cur_reqs >= vmp->m_comm.c_max_reqs)/*No room to send more*/
 	return;
  if (vmp->m_flags & VMNT_BACKCALL)	/* Hold off for now */
 	return;
  vmp->m_comm.c_req_queue = worker->w_next; /* Remove head */
  worker->w_next = NULL;
  sending--;
  assert(sending >= 0);
  sendmsg(vmp, worker->w_job.j_fp);
 }
 /*===========================================================================*
 *				fs_sendrec				     *
 *===========================================================================*/
 PUBLIC int fs_sendrec(endpoint_t fs_e, message *reqmp)
 {
  struct vmnt *vmp;
  int r;
  if ((vmp = find_vmnt(fs_e)) == NULL)
 	panic("Trying to talk to non-existent FS");
  if (!force_sync) {
 	fp->fp_sendrec = reqmp;	/* Where to store request and reply */
 	/* Find out whether we can send right away or have to enqueue */
 	if (	!(vmp->m_flags & VMNT_BACKCALL) &&
 		vmp->m_comm.c_cur_reqs < vmp->m_comm.c_max_reqs) {
 		/* There's still room to send more and no proc is queued */
 		r = sendmsg(vmp, fp);
 	} else {
 		r = queuemsg(vmp);
 	}
 	self->w_next = NULL;	/* End of list */
 	if (r != OK) return(r);
 	worker_wait();	/* Yield execution until we've received the reply. */
  } else if (force_sync == 1) {
 	int r;
 	if (OK != (r = sendrec(fs_e, reqmp))) {
 		printf("VFS: sendrec failed: %d\n", r);
 		util_stacktrace();
 		return(r);
 	}
  } else if (force_sync == 2) {
 	int r, status;
 	if (OK != (r = asynsend(fs_e, reqmp)) ||
 	    OK != (r = receive(fs_e, reqmp, &status))) {
 		printf("VFS: asynrec failed: %d\n", r);
 		util_stacktrace();
 		return(r);
 	}
  } else if (force_sync == 3) {
 	int r, status;
 	if (OK != (r = send(fs_e, reqmp)) ||
 	    OK != (r = receive(fs_e, reqmp, &status))) {
 		printf("VFS: sendreceive failed: %d\n", r);
 		util_stacktrace();
 		return(r);
 	}
  }
  if (reqmp->m_type == -EENTERMOUNT || reqmp->m_type == -ELEAVEMOUNT ||
      reqmp->m_type == -ESYMLINK) {
 	reqmp->m_type = -reqmp->m_type;
  } else if (force_sync != 0 && reqmp->m_type > 0) {
 	/* XXX: Keep this as long as we're interested in having support
 	 * for synchronous communication. */
 	nested_fs_call(reqmp);
 	return fs_sendrec(fs_e, reqmp);
  }
  return(reqmp->m_type);
 }
 /*===========================================================================*
 *				queuemsg				     *
 *===========================================================================*/
 PRIVATE int queuemsg(struct vmnt *vmp)
 {
 /* Put request on queue for vmnt */
  struct worker_thread *queue;
  if (vmp->m_comm.c_req_queue == NULL) {
 	vmp->m_comm.c_req_queue = self;
  } else {
 	/* Walk the list ... */
 	queue = vmp->m_comm.c_req_queue;
 	while (queue->w_next != NULL) queue = queue->w_next;
 	/* ... and append this worker */
 	queue->w_next = self;
  }
  self->w_next = NULL;	/* End of list */
  sending++;
  return(OK);
 }
--- a/servers/avfs/comm.h
+++ b/servers/avfs/comm.h
@ -0,0 +1,12 @@
 #ifndef __VFS_COMM_H__
 #define __VFS_COMM_H__
 /* VFS<->FS communication */
 typedef struct {
  int c_max_reqs;	/* Max requests an FS can handle simultaneously */
  int c_cur_reqs;	/* Number of requests the FS is currently handling */
  struct worker_thread *c_req_queue;/* Queue of procs waiting to send a message */
 } comm_t;
 #endif
--- a/servers/avfs/const.h
+++ b/servers/avfs/const.h
@ -0,0 +1,50 @@
 #ifndef __VFS_CONST_H__
 #define __VFS_CONST_H__
 /* Tables sizes */
 #define NR_FILPS         512	/* # slots in filp table */
 #define NR_LOCKS           8	/* # slots in the file locking table */
 #define NR_MNTS           16 	/* # slots in mount table */
 #define NR_VNODES        512	/* # slots in vnode table */
 #define NR_WTHREADS	   8	/* # slots in worker thread table */
 #define NR_NONEDEVS	NR_MNTS	/* # slots in nonedev bitmap */
 /* Miscellaneous constants */
 #define SU_UID 	 ((uid_t) 0)	/* super_user's uid_t */
 #define SYS_UID  ((uid_t) 0)	/* uid_t for system processes and INIT */
 #define SYS_GID  ((gid_t) 0)	/* gid_t for system processes and INIT */
 #define FP_BLOCKED_ON_NONE	0 /* not blocked */
 #define FP_BLOCKED_ON_PIPE	1 /* susp'd on pipe */
 #define FP_BLOCKED_ON_LOCK	2 /* susp'd on lock */
 #define FP_BLOCKED_ON_POPEN	3 /* susp'd on pipe open */
 #define FP_BLOCKED_ON_SELECT	4 /* susp'd on select */
 #define FP_BLOCKED_ON_DOPEN	5 /* susp'd on device open */
 #define FP_BLOCKED_ON_OTHER	6 /* blocked on other process, check
 				     fp_task to find out */
 /* test if the process is blocked on something */
 #define fp_is_blocked(fp)	((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE)
 #define DUP_MASK        0100	/* mask to distinguish dup2 from dup */
 #define LOOK_UP            0 /* tells search_dir to lookup string */
 #define ENTER              1 /* tells search_dir to make dir entry */
 #define DELETE             2 /* tells search_dir to delete entry */
 #define IS_EMPTY           3 /* tells search_dir to ret. OK or ENOTEMPTY */
 #define SYMLOOP		16
 #define LABEL_MAX	16	/* maximum label size (including '\0'). Should
 				 * not be smaller than 16 or bigger than
 				 * M3_LONG_STRING.
 				 */
 /* Args to dev_io */
 #define VFS_DEV_READ	2001
 #define	VFS_DEV_WRITE	2002
 #define VFS_DEV_IOCTL	2005
 #define VFS_DEV_SELECT	2006
 #endif
--- a/servers/avfs/device.c
+++ b/servers/avfs/device.c
--- a/servers/avfs/dmap.c
+++ b/servers/avfs/dmap.c
@ -0,0 +1,257 @@
 /* This file contains the table with device <-> driver mappings. It also
 * contains some routines to dynamically add and/ or remove device drivers
 * or change mappings.
 */
 #include "fs.h"
 #include <string.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <unistd.h>
 #include <minix/com.h>
 #include <minix/ds.h>
 #include "fproc.h"
 #include "dmap.h"
 #include "param.h"
 /* The order of the entries in the table determines the mapping between major
 * device numbers and device drivers. Character and block devices
 * can be intermixed at random.  The ordering determines the device numbers in
 * /dev. Note that the major device numbers used in /dev are NOT the same as
 * the process numbers of the device drivers. See <minix/dmap.h> for mappings.
 */
 struct dmap dmap[NR_DEVICES];
 #define DT_EMPTY { no_dev, no_dev_io, NONE, "", 0, STYLE_NDEV, NULL }
 /*===========================================================================*
 *				do_mapdriver		 		     *
 *===========================================================================*/
 PUBLIC int do_mapdriver()
 {
 /* Create a device->driver mapping. RS will tell us which major is driven by
 * this driver, what type of device it is (regular, TTY, asynchronous, clone,
 * etc), and its label. This label is registered with DS, and allows us to
 * retrieve the driver's endpoint.
 */
  int r, flags, major;
  endpoint_t endpoint;
  vir_bytes label_vir;
  size_t label_len;
  char label[LABEL_MAX];
  /* Only RS can map drivers. */
  if (who_e != RS_PROC_NR) return(EPERM);
  /* Get the label */
  label_vir = (vir_bytes) m_in.md_label;
  label_len = (size_t) m_in.md_label_len;
  if (label_len+1 > sizeof(label)) { /* Can we store this label? */
 	printf("VFS: do_mapdriver: label too long\n");
 	return(EINVAL);
  }
  r = sys_vircopy(who_e, D, label_vir, SELF, D, (vir_bytes) label, label_len);
  if (r != OK) {
 	printf("VFS: do_mapdriver: sys_vircopy failed: %d\n", r);
 	return(EINVAL);
  }
  label[label_len] = '\0';	/* Terminate label */
  /* Now we know how the driver is called, fetch its endpoint */
  r = ds_retrieve_label_endpt(label, &endpoint);
  if (r != OK) {
 	printf("VFS: do_mapdriver: label '%s' unknown\n", label);
 	return(EINVAL);
  }
  /* Try to update device mapping. */
  major = m_in.md_major;
  flags = m_in.md_flags;
  return map_driver(label, major, endpoint, m_in.md_style, flags);
 }
 /*===========================================================================*
 *				map_driver		 		     *
 *===========================================================================*/
 PUBLIC int map_driver(label, major, proc_nr_e, style, flags)
 const char *label;		/* name of the driver */
 int major;			/* major number of the device */
 endpoint_t proc_nr_e;		/* process number of the driver */
 int style;			/* style of the device */
 int flags;			/* device flags */
 {
 /* Add a new device driver mapping in the dmap table. If the proc_nr is set to
 * NONE, we're supposed to unmap it.
 */
  int slot;
  size_t len;
  struct dmap *dp;
  /* Get pointer to device entry in the dmap table. */
  if (major < 0 || major >= NR_DEVICES) return(ENODEV);
  dp = &dmap[major];
  /* Check if we're supposed to unmap it. */
 if(proc_nr_e == NONE) {
 	dp->dmap_opcl = no_dev;
 	dp->dmap_io = no_dev_io;
 	dp->dmap_driver = NONE;
 	dp->dmap_flags = flags;
 	return(OK);
  }
  /* Check process number of new driver if it was alive before mapping */
  if (! (flags & DRV_FORCED)) {
 	if (isokendpt(proc_nr_e, &slot) != OK)
 		return(EINVAL);
  }
  if (label != NULL) {
 	len = strlen(label);
 	if (len+1 > sizeof(dp->dmap_label))
 		panic("VFS: map_driver: label too long: %d", len);
 	strcpy(dp->dmap_label, label);
  }
  /* Store driver I/O routines based on type of device */
  switch (style) {
    case STYLE_DEV:
 	dp->dmap_opcl = gen_opcl;
 	dp->dmap_io = gen_io;
 	break;
    case STYLE_DEVA:
 	dp->dmap_opcl = gen_opcl;
 	dp->dmap_io = asyn_io;
 	break;
    case STYLE_TTY:
 	dp->dmap_opcl = tty_opcl;
 	dp->dmap_io = gen_io;
 	break;
    case STYLE_CTTY:
 	dp->dmap_opcl = ctty_opcl;
 	dp->dmap_io = ctty_io;
 	break;
    case STYLE_CLONE:
 	dp->dmap_opcl = clone_opcl;
 	dp->dmap_io = gen_io;
 	break;
    case STYLE_CLONE_A:
 	dp->dmap_opcl = clone_opcl;
 	dp->dmap_io = asyn_io;
 	break;
    default:
 	return(EINVAL);
  }
  dp->dmap_driver = proc_nr_e;
  dp->dmap_flags = flags;
  dp->dmap_style = style;
  return(OK);
 }
 /*===========================================================================*
 *				dmap_unmap_by_endpt	 		     *
 *===========================================================================*/
 PUBLIC void dmap_unmap_by_endpt(endpoint_t proc_e)
 {
 /* Lookup driver in dmap table by endpoint and unmap it */
  int major, r;
  for (major = 0; major < NR_DEVICES; major++) {
 	if (dmap_driver_match(proc_e, major)) {
 		/* Found driver; overwrite it with a NULL entry */
 		if ((r = map_driver(NULL, major, NONE, 0, 0)) != OK) {
 			printf("VFS: unmapping driver %d for major %d failed:"
 				" %d\n", proc_e, major, r);
 		}
 	}
  }
 }
 /*===========================================================================*
 *		               map_service                                   *
 *===========================================================================*/
 PUBLIC int map_service(struct rprocpub *rpub)
 {
 /* Map a new service by storing its device driver properties. */
  int r;
  /* Not a driver, nothing more to do. */
  if(rpub->dev_nr == NO_DEV) return(OK);
  /* Map driver. */
  r = map_driver(rpub->label, rpub->dev_nr, rpub->endpoint, rpub->dev_style,
 		 rpub->dev_flags);
  if(r != OK) return(r);
  /* If driver has two major numbers associated, also map the other one. */
  if(rpub->dev_style2 != STYLE_NDEV) {
 	r = map_driver(rpub->label, rpub->dev_nr+1, rpub->endpoint,
 		       rpub->dev_style2, rpub->dev_flags);
 	if(r != OK) return(r);
  }
  return(OK);
 }
 /*===========================================================================*
 *				init_dmap		 		     *
 *===========================================================================*/
 PUBLIC void init_dmap()
 {
 /* Initialize the table with empty device <-> driver mappings. */
  int i;
  struct dmap dmap_default = DT_EMPTY;
  for (i = 0; i < NR_DEVICES; i++)
 	dmap[i] = dmap_default;
 }
 /*===========================================================================*
 *				dmap_driver_match	 		     *
 *===========================================================================*/
 PUBLIC int dmap_driver_match(endpoint_t proc, int major)
 {
  if (major < 0 || major >= NR_DEVICES) return(0);
  if (dmap[major].dmap_driver != NONE && dmap[major].dmap_driver == proc)
 	return(1);
  return(0);
 }
 /*===========================================================================*
 *				dmap_endpt_up		 		     *
 *===========================================================================*/
 PUBLIC void dmap_endpt_up(endpoint_t proc_e)
 {
 /* A device driver with endpoint proc_e has been restarted. Go tell everyone
 * that might be blocking on it that this device is 'up'.
 */
  int major;
  for (major = 0; major < NR_DEVICES; major++)
 	if (dmap_driver_match(proc_e, major))
 		dev_up(major);
 }
 /*===========================================================================*
 *				get_dmap		 		     *
 *===========================================================================*/
 PUBLIC struct dmap *get_dmap(endpoint_t proc_e)
 {
 /* See if 'proc_e' endpoint belongs to a valid dmap entry. If so, return a
 * pointer */
  int major;
  for (major = 0; major < NR_DEVICES; major++)
 	if (dmap_driver_match(proc_e, major))
 		return(&dmap[major]);
  return(NULL);
 }
--- a/servers/avfs/dmap.h
+++ b/servers/avfs/dmap.h
@ -0,0 +1,28 @@
 #ifndef __VFS_DMAP_H__
 #define __VFS_DMAP_H__
 /*
 dmap.h
 */
 /*===========================================================================*
 *               	 Device <-> Driver Table  			     *
 *===========================================================================*/
 /* Device table.  This table is indexed by major device number.  It provides
 * the link between major device numbers and the routines that process them.
 * The table can be update dynamically. The field 'dmap_flags' describe an
 * entry's current status and determines what control options are possible.
 */
 extern struct dmap {
  int _PROTOTYPE ((*dmap_opcl), (int, dev_t, int, int) );
  int _PROTOTYPE ((*dmap_io), (int, message *) );
  endpoint_t dmap_driver;
  char dmap_label[LABEL_MAX];
  int dmap_flags;
  int dmap_style;
  struct filp *dmap_sel_filp;
 } dmap[];
 #endif
--- a/servers/avfs/exec.c
+++ b/servers/avfs/exec.c
@ -0,0 +1,707 @@
 /* This file handles the EXEC system call.  It performs the work as follows:
 *    - see if the permissions allow the file to be executed
 *    - read the header and extract the sizes
 *    - fetch the initial args and environment from the user space
 *    - allocate the memory for the new process
 *    - copy the initial stack from PM to the process
 *    - read in the text and data segments and copy to the process
 *    - take care of setuid and setgid bits
 *    - fix up 'mproc' table
 *    - tell kernel about EXEC
 *    - save offset to initial argc (for ps)
 *
 * The entry points into this file are:
 *   pm_exec:	 perform the EXEC system call
 */
 #include "fs.h"
 #include <sys/stat.h>
 #include <minix/callnr.h>
 #include <minix/endpoint.h>
 #include <minix/com.h>
 #include <minix/u64.h>
 #include <a.out.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <dirent.h>
 #include <sys/param.h>
 #include "fproc.h"
 #include "path.h"
 #include "param.h"
 #include "vnode.h"
 #include <minix/vfsif.h>
 #include <assert.h>
 #include <libexec.h>
 #include "exec.h"
 FORWARD _PROTOTYPE( void lock_exec, (void)				);
 FORWARD _PROTOTYPE( void unlock_exec, (void)				);
 FORWARD _PROTOTYPE( int exec_newmem, (int proc_e, vir_bytes text_addr, vir_bytes text_bytes,
 		       vir_bytes data_addr, vir_bytes data_bytes,
 		       vir_bytes tot_bytes, vir_bytes frame_len, int sep_id,
 		       int is_elf, dev_t st_dev, ino_t st_ino, time_t ctime,
 		       char *progname, int new_uid, int new_gid,
 		       vir_bytes *stack_topp, int *load_textp,
 		       int *allow_setuidp)				);
 FORWARD _PROTOTYPE( int is_script, (const char *exec_hdr, size_t exec_len));
 FORWARD _PROTOTYPE( int patch_stack, (struct vnode *vp, char stack[ARG_MAX],
 		       vir_bytes *stk_bytes, char path[PATH_MAX+1])	);
 FORWARD _PROTOTYPE( int insert_arg, (char stack[ARG_MAX], vir_bytes *stk_bytes,
 					char *arg, int replace)		);
 FORWARD _PROTOTYPE( void patch_ptr, (char stack[ARG_MAX], vir_bytes base));
 FORWARD _PROTOTYPE( void clo_exec, (struct fproc *rfp)			);
 FORWARD _PROTOTYPE( int read_seg, (struct vnode *vp, off_t off, int proc_e,
 					int seg, vir_bytes seg_addr,
 					phys_bytes seg_bytes)		);
 FORWARD _PROTOTYPE( int load_aout, (struct exec_info *execi)		);
 FORWARD _PROTOTYPE( int load_elf, (struct exec_info *execi)		);
 FORWARD _PROTOTYPE( int map_header, (char **exec_hdr,
 					const struct vnode *vp)		);
 #define PTRSIZE	sizeof(char *) /* Size of pointers in argv[] and envp[]. */
 /* Array of loaders for different object file formats */
 struct exec_loaders {
 	int (*load_object)(struct exec_info *);
 };
 PRIVATE const struct exec_loaders exec_loaders[] = {
 	{ load_aout },
 	{ load_elf },
 	{ NULL }
 };
 PRIVATE char hdr[PAGE_SIZE]; /* Assume that header is not larger than a page */
 /*===========================================================================*
 *				lock_exec				     *
 *===========================================================================*/
 PRIVATE void lock_exec(void)
 {
  message org_m_in;
  struct fproc *org_fp;
  struct worker_thread *org_self;
  /* First try to get it right off the bat */
  if (mutex_trylock(&exec_lock) == 0)
 	return;
  org_m_in = m_in;
  org_fp = fp;
  org_self = self;
  if (mutex_lock(&exec_lock) != 0)
 	panic("Could not obtain lock on exec");
  m_in = org_m_in;
  fp = org_fp;
  self = org_self;
 }
 /*===========================================================================*
 *				unlock_exec				     *
 *===========================================================================*/
 PRIVATE void unlock_exec(void)
 {
  if (mutex_unlock(&exec_lock) != 0)
 	panic("Could not release lock on exec");
 }
 /*===========================================================================*
 *				pm_exec					     *
 *===========================================================================*/
 PUBLIC int pm_exec(int proc_e, char *path, vir_bytes path_len, char *frame,
 		   vir_bytes frame_len, vir_bytes *pc)
 {
 /* Perform the execve(name, argv, envp) call.  The user library builds a
 * complete stack image, including pointers, args, environ, etc.  The stack
 * is copied to a buffer inside VFS, and then to the new core image.
 */
  int r, r1, round, slot;
  vir_bytes vsp;
  struct fproc *rfp;
  struct vnode *vp;
  struct vmnt *vmp;
  char *cp;
  static char mbuf[ARG_MAX];	/* buffer for stack and zeroes */
  struct exec_info execi;
  int i;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lock_exec();
  okendpt(proc_e, &slot);
  rfp = fp = &fproc[slot];
  vp = NULL;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  /* Get the exec file name. */
  if ((r = fetch_name(path, path_len, 0, fullpath)) != OK)
 	goto pm_execfinal;
  /* Fetch the stack from the user before destroying the old core image. */
  if (frame_len > ARG_MAX) {
 		printf("VFS: pm_exec: stack too big\n");
 		r = ENOMEM; /* stack too big */
 		goto pm_execfinal;
  }
  r = sys_datacopy(proc_e, (vir_bytes) frame, SELF, (vir_bytes) mbuf,
 		   (phys_bytes) frame_len);
  if (r != OK) { /* can't fetch stack (e.g. bad virtual addr) */
        printf("VFS: pm_exec: sys_datacopy failed\n");
        goto pm_execfinal;
  }
  /* The default is to keep the original user and group IDs */
  execi.new_uid = rfp->fp_effuid;
  execi.new_gid = rfp->fp_effgid;
  for (round = 0; round < 2; round++) {
 	/* round = 0 (first attempt), or 1 (interpreted script) */
 	/* Save the name of the program */
 	(cp = strrchr(fullpath, '/')) ? cp++ : (cp = fullpath);
 	strncpy(execi.progname, cp, PROC_NAME_LEN-1);
 	execi.progname[PROC_NAME_LEN-1] = '\0';
 	/* Open executable */
 	if ((vp = eat_path(&resolve, fp)) == NULL) {
 		r = err_code;
 		goto pm_execfinal;
 	}
 	execi.vp = vp;
 	unlock_vmnt(vmp);
 	if ((vp->v_mode & I_TYPE) != I_REGULAR)
 		r = ENOEXEC;
 	else if ((r1 = forbidden(vp, X_BIT)) != OK)
 		r = r1;
 	else
 		r = req_stat(vp->v_fs_e, vp->v_inode_nr, VFS_PROC_NR,
 			     (char *) &(execi.sb), 0, 0);
 	if (r != OK) goto pm_execfinal;
        if (round == 0) {
            /* Deal with setuid/setgid executables */
            if (vp->v_mode & I_SET_UID_BIT) execi.new_uid = vp->v_uid;
            if (vp->v_mode & I_SET_GID_BIT) execi.new_gid = vp->v_gid;
        }
 	r = map_header(&execi.hdr, execi.vp);
 	if (r != OK) goto pm_execfinal;
 	if (!is_script(execi.hdr, execi.vp->v_size) || round != 0)
 		break;
 	/* Get fresh copy of the file name. */
 	if ((r = fetch_name(path, path_len, 0, fullpath)) != OK)
 		printf("VFS pm_exec: 2nd fetch_name failed\n");
 	else if ((r = patch_stack(vp, mbuf, &frame_len, fullpath)) != OK)
 		printf("VFS pm_exec: patch_stack failed\n");
 	unlock_vnode(vp);
 	put_vnode(vp);
 	vp = NULL;
 	if (r != OK) goto pm_execfinal;
  }
  execi.proc_e = proc_e;
  execi.frame_len = frame_len;
  for (i = 0; exec_loaders[i].load_object != NULL; i++) {
      r = (*exec_loaders[i].load_object)(&execi);
      /* Loaded successfully, so no need to try other loaders */
      if (r == OK) break;
  }
  if (r != OK) {   /* No exec loader could load the object */
 	r = ENOEXEC;
 	goto pm_execfinal;
  }
  /* Save off PC */
  *pc = execi.pc;
  /* Patch up stack and copy it from VFS to new core image. */
  vsp = execi.stack_top;
  vsp -= frame_len;
  patch_ptr(mbuf, vsp);
  if ((r = sys_datacopy(SELF, (vir_bytes) mbuf, proc_e, (vir_bytes) vsp,
 		   (phys_bytes)frame_len)) != OK) {
 	printf("VFS: datacopy failed (%d) trying to copy to %lu\n", r, vsp);
 	goto pm_execfinal;
  }
  if (r != OK) goto pm_execfinal;
  clo_exec(rfp);
  if (execi.allow_setuid) {
 	rfp->fp_effuid = execi.new_uid;
 	rfp->fp_effgid = execi.new_gid;
  }
 pm_execfinal:
  if (vp != NULL) {
 	unlock_vnode(vp);
 	put_vnode(vp);
  }
  unlock_exec();
  return(r);
 }
 /*===========================================================================*
 *				load_aout				     *
 *===========================================================================*/
 PRIVATE int load_aout(struct exec_info *execi)
 {
  int r;
  struct vnode *vp;
  int proc_e;
  off_t off;
  int hdrlen;
  int sep_id;
  vir_bytes text_bytes, data_bytes, bss_bytes;
  phys_bytes tot_bytes;		/* total space for program, including gap */
  assert(execi != NULL);
  assert(execi->hdr != NULL);
  assert(execi->vp != NULL);
  proc_e = execi->proc_e;
  vp = execi->vp;
  /* Read the file header and extract the segment sizes. */
  r = read_header_aout(execi->hdr, execi->vp->v_size, &sep_id,
 		       &text_bytes, &data_bytes, &bss_bytes,
 		       &tot_bytes, &execi->pc, &hdrlen);
  if (r != OK) return(r);
  r = exec_newmem(proc_e, 0 /* text_addr */, text_bytes,
 		  0 /* data_addr */, data_bytes + bss_bytes, tot_bytes,
 		  execi->frame_len, sep_id, 0 /* is_elf */, vp->v_dev, vp->v_inode_nr,
 		  execi->sb.st_ctime,
 		  execi->progname, execi->new_uid, execi->new_gid,
 		  &execi->stack_top, &execi->load_text, &execi->allow_setuid);
  if (r != OK) {
        printf("VFS: load_aout: exec_newmem failed: %d\n", r);
        return(r);
  }
  off = hdrlen;
  /* Read in text and data segments. */
  if (execi->load_text)
 	r = read_seg(vp, off, proc_e, T, 0, text_bytes);
  off += text_bytes;
  if (r == OK)
 	r = read_seg(vp, off, proc_e, D, 0, data_bytes);
  return(r);
 }
 /*===========================================================================*
 *				load_elf				     *
 *===========================================================================*/
 PRIVATE int load_elf(struct exec_info *execi)
 {
  int r;
  struct vnode *vp;
  int proc_e;
  phys_bytes tot_bytes;		/* total space for program, including gap */
  vir_bytes text_vaddr, text_paddr, text_filebytes, text_membytes;
  vir_bytes data_vaddr, data_paddr, data_filebytes, data_membytes;
  off_t text_offset, data_offset;
  int sep_id, is_elf;
  assert(execi != NULL);
  assert(execi->hdr != NULL);
  assert(execi->vp != NULL);
  proc_e = execi->proc_e;
  vp = execi->vp;
  /* Read the file header and extract the segment sizes. */
  r = read_header_elf(execi->hdr, &text_vaddr, &text_paddr,
 		      &text_filebytes, &text_membytes,
 		      &data_vaddr, &data_paddr,
 		      &data_filebytes, &data_membytes,
 		      &execi->pc, &text_offset, &data_offset);
  if (r != OK) return(r);
  sep_id = 0;
  is_elf = 1;
  tot_bytes = 0; /* Use default stack size */
  r = exec_newmem(proc_e,
 		  trunc_page(text_vaddr), text_membytes,
 		  trunc_page(data_vaddr), data_membytes,
 		  tot_bytes, execi->frame_len, sep_id, is_elf,
 		  vp->v_dev, vp->v_inode_nr, execi->sb.st_ctime,
 		  execi->progname, execi->new_uid, execi->new_gid,
 		  &execi->stack_top, &execi->load_text, &execi->allow_setuid);
  if (r != OK) {
        printf("VFS: load_elf: exec_newmem failed: %d\n", r);
        return(r);
  }
  /* Read in text and data segments. */
  if (execi->load_text)
      r = read_seg(vp, text_offset, proc_e, T, text_vaddr, text_filebytes);
  if (r == OK)
      r = read_seg(vp, data_offset, proc_e, D, data_vaddr, data_filebytes);
  return(r);
 }
 /*===========================================================================*
 *				exec_newmem				     *
 *===========================================================================*/
 PRIVATE int exec_newmem(
  int proc_e,
  vir_bytes text_addr,
  vir_bytes text_bytes,
  vir_bytes data_addr,
  vir_bytes data_bytes,
  vir_bytes tot_bytes,
  vir_bytes frame_len,
  int sep_id,
  int is_elf,
  dev_t st_dev,
  ino_t st_ino,
  time_t ctime,
  char *progname,
  int new_uid,
  int new_gid,
  vir_bytes *stack_topp,
  int *load_textp,
  int *allow_setuidp
 )
 {
 /* Allocate a new memory map for a process that tries to exec */
  int r;
  struct exec_newmem e;
  message m;
  e.text_addr = text_addr;
  e.text_bytes = text_bytes;
  e.data_addr = data_addr;
  e.data_bytes = data_bytes;
  e.tot_bytes  = tot_bytes;
  e.args_bytes = frame_len;
  e.sep_id     = sep_id;
  e.is_elf     = is_elf;
  e.st_dev     = st_dev;
  e.st_ino     = st_ino;
  e.enst_ctime = ctime;
  e.new_uid    = new_uid;
  e.new_gid    = new_gid;
  strncpy(e.progname, progname, sizeof(e.progname)-1);
  e.progname[sizeof(e.progname)-1] = '\0';
  m.m_type = EXEC_NEWMEM;
  m.EXC_NM_PROC = proc_e;
  m.EXC_NM_PTR = (char *)&e;
  if ((r = sendrec(PM_PROC_NR, &m)) != OK) return(r);
  *stack_topp = m.m1_i1;
  *load_textp = !!(m.m1_i2 & EXC_NM_RF_LOAD_TEXT);
  *allow_setuidp = !!(m.m1_i2 & EXC_NM_RF_ALLOW_SETUID);
  return(m.m_type);
 }
 /*===========================================================================*
 *				is_script				     *
 *===========================================================================*/
 PRIVATE int is_script(const char *exec_hdr, size_t exec_len)
 {
 /* Is Interpreted script? */
  assert(exec_hdr != NULL);
  return(exec_hdr[0] == '#' && exec_hdr[1] == '!' && exec_len >= 2);
 }
 /*===========================================================================*
 *				patch_stack				     *
 *===========================================================================*/
 PRIVATE int patch_stack(vp, stack, stk_bytes, path)
 struct vnode *vp;		/* pointer for open script file */
 char stack[ARG_MAX];		/* pointer to stack image within VFS */
 vir_bytes *stk_bytes;		/* size of initial stack */
 char path[PATH_MAX+1];		/* path to script file */
 {
 /* Patch the argument vector to include the path name of the script to be
 * interpreted, and all strings on the #! line.  Returns the path name of
 * the interpreter.
 */
  enum { INSERT=FALSE, REPLACE=TRUE };
  int n, r;
  off_t pos;
  char *sp, *interp = NULL;
  u64_t new_pos;
  unsigned int cum_io;
  char buf[_MAX_BLOCK_SIZE];
  /* Make 'path' the new argv[0]. */
  if (!insert_arg(stack, stk_bytes, path, REPLACE)) return(ENOMEM);
  pos = 0;	/* Read from the start of the file */
  /* Issue request */
  r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING,
 		    VFS_PROC_NR, buf, _MAX_BLOCK_SIZE, &new_pos, &cum_io);
  if (r != OK) return(r);
  n = vp->v_size;
  if (n > _MAX_BLOCK_SIZE)
 	n = _MAX_BLOCK_SIZE;
  if (n < 2) return ENOEXEC;
  sp = &(buf[2]);				/* just behind the #! */
  n -= 2;
  if (n > PATH_MAX) n = PATH_MAX;
  /* Use the 'path' variable for temporary storage */
  memcpy(path, sp, n);
  if ((sp = memchr(path, '\n', n)) == NULL) /* must be a proper line */
 	return(ENOEXEC);
  /* Move sp backwards through script[], prepending each string to stack. */
  for (;;) {
 	/* skip spaces behind argument. */
 	while (sp > path && (*--sp == ' ' || *sp == '\t')) {}
 	if (sp == path) break;
 	sp[1] = 0;
 	/* Move to the start of the argument. */
 	while (sp > path && sp[-1] != ' ' && sp[-1] != '\t') --sp;
 	interp = sp;
 	if (!insert_arg(stack, stk_bytes, sp, INSERT)) {
 		printf("VFS: patch_stack: insert_arg failed\n");
 		return(ENOMEM);
 	}
  }
  /* Round *stk_bytes up to the size of a pointer for alignment contraints. */
  *stk_bytes= ((*stk_bytes + PTRSIZE - 1) / PTRSIZE) * PTRSIZE;
  if (interp != path)
 	memmove(path, interp, strlen(interp)+1);
  return(OK);
 }
 /*===========================================================================*
 *				insert_arg				     *
 *===========================================================================*/
 PRIVATE int insert_arg(
 char stack[ARG_MAX],		/* pointer to stack image within PM */
 vir_bytes *stk_bytes,		/* size of initial stack */
 char *arg,			/* argument to prepend/replace as new argv[0] */
 int replace
 )
 {
 /* Patch the stack so that arg will become argv[0].  Be careful, the stack may
 * be filled with garbage, although it normally looks like this:
 *	nargs argv[0] ... argv[nargs-1] NULL envp[0] ... NULL
 * followed by the strings "pointed" to by the argv[i] and the envp[i].  The
 * pointers are really offsets from the start of stack.
 * Return true iff the operation succeeded.
 */
  int offset, a0, a1, old_bytes = *stk_bytes;
  /* Prepending arg adds at least one string and a zero byte. */
  offset = strlen(arg) + 1;
  a0 = (int) ((char **) stack)[1];	/* argv[0] */
  if (a0 < 4 * PTRSIZE || a0 >= old_bytes) return(FALSE);
  a1 = a0;			/* a1 will point to the strings to be moved */
  if (replace) {
 	/* Move a1 to the end of argv[0][] (argv[1] if nargs > 1). */
 	do {
 		if (a1 == old_bytes) return(FALSE);
 		--offset;
 	} while (stack[a1++] != 0);
  } else {
 	offset += PTRSIZE;	/* new argv[0] needs new pointer in argv[] */
 	a0 += PTRSIZE;		/* location of new argv[0][]. */
  }
  /* stack will grow by offset bytes (or shrink by -offset bytes) */
  if ((*stk_bytes += offset) > ARG_MAX) return(FALSE);
  /* Reposition the strings by offset bytes */
  memmove(stack + a1 + offset, stack + a1, old_bytes - a1);
  strcpy(stack + a0, arg);	/* Put arg in the new space. */
  if (!replace) {
 	/* Make space for a new argv[0]. */
 	memmove(stack + 2 * PTRSIZE, stack + 1 * PTRSIZE, a0 - 2 * PTRSIZE);
 	((char **) stack)[0]++;	/* nargs++; */
  }
  /* Now patch up argv[] and envp[] by offset. */
  patch_ptr(stack, (vir_bytes) offset);
  ((char **) stack)[1] = (char *) a0;	/* set argv[0] correctly */
  return(TRUE);
 }
 /*===========================================================================*
 *				patch_ptr				     *
 *===========================================================================*/
 PRIVATE void patch_ptr(
 char stack[ARG_MAX],		/* pointer to stack image within PM */
 vir_bytes base			/* virtual address of stack base inside user */
 )
 {
 /* When doing an exec(name, argv, envp) call, the user builds up a stack
 * image with arg and env pointers relative to the start of the stack.  Now
 * these pointers must be relocated, since the stack is not positioned at
 * address 0 in the user's address space.
 */
  char **ap, flag;
  vir_bytes v;
  flag = 0;			/* counts number of 0-pointers seen */
  ap = (char **) stack;		/* points initially to 'nargs' */
  ap++;				/* now points to argv[0] */
  while (flag < 2) {
 	if (ap >= (char **) &stack[ARG_MAX]) return;	/* too bad */
 	if (*ap != NULL) {
 		v = (vir_bytes) *ap;	/* v is relative pointer */
 		v += base;		/* relocate it */
 		*ap = (char *) v;	/* put it back */
 	} else {
 		flag++;
 	}
 	ap++;
  }
 }
 /*===========================================================================*
 *				read_seg				     *
 *===========================================================================*/
 PRIVATE int read_seg(
 struct vnode *vp, 		/* inode descriptor to read from */
 off_t off,			/* offset in file */
 int proc_e,			/* process number (endpoint) */
 int seg,			/* T, D, or S */
 vir_bytes seg_addr,		/* address to load segment */
 phys_bytes seg_bytes		/* how much is to be transferred? */
 )
 {
 /*
 * The byte count on read is usually smaller than the segment count, because
 * a segment is padded out to a click multiple, and the data segment is only
 * partially initialized.
 */
  int r;
  unsigned n, o;
  u64_t new_pos;
  unsigned int cum_io;
  static char buf[128 * 1024];
  assert((seg == T)||(seg == D));
  /* Make sure that the file is big enough */
  if (vp->v_size < off+seg_bytes) return(EIO);
  if (seg == T) {
 	/* We have to use a copy loop until safecopies support segments */
 	o = 0;
 	while (o < seg_bytes) {
 		n = seg_bytes - o;
 		if (n > sizeof(buf))
 			n = sizeof(buf);
 		if ((r = req_readwrite(vp->v_fs_e,vp->v_inode_nr,cvul64(off+o),
 				       READING, VFS_PROC_NR, buf,
 				       n, &new_pos, &cum_io)) != OK) {
 			printf("VFS: read_seg: req_readwrite failed (text)\n");
 			return(r);
 		}
 		if (cum_io != n) {
 			printf(
 		"VFSread_seg segment has not been read properly by exec() \n");
 			return(EIO);
 		}
 		if ((r = sys_vircopy(VFS_PROC_NR, D, (vir_bytes)buf, proc_e,
 				     seg, seg_addr + o, n)) != OK) {
 			printf("VFS: read_seg: copy failed (text)\n");
 			return(r);
 		}
 		o += n;
 	}
 	return(OK);
  } else if (seg == D) {
 	if ((r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(off), READING,
 			 proc_e, (char*)seg_addr, seg_bytes,
 			 &new_pos, &cum_io)) != OK) {
 	    printf("VFS: read_seg: req_readwrite failed (data)\n");
 	    return(r);
 	}
 	if (r == OK && cum_io != seg_bytes)
 	    printf("VFS: read_seg segment has not been read properly by exec()\n");
 	return(r);
  }
  return(OK);
 }
 /*===========================================================================*
 *				clo_exec				     *
 *===========================================================================*/
 PRIVATE void clo_exec(struct fproc *rfp)
 {
 /* Files can be marked with the FD_CLOEXEC bit (in fp->fp_cloexec).
 */
  int i;
  /* Check the file desriptors one by one for presence of FD_CLOEXEC. */
  for (i = 0; i < OPEN_MAX; i++)
 	if ( FD_ISSET(i, &rfp->fp_cloexec_set))
 		(void) close_fd(rfp, i);
 }
 /*===========================================================================*
 *				map_header				     *
 *===========================================================================*/
 PRIVATE int map_header(char **exec_hdr, const struct vnode *vp)
 {
  int r;
  u64_t new_pos;
  unsigned int cum_io;
  off_t pos;
  pos = 0;	/* Read from the start of the file */
  r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, cvul64(pos), READING,
 		    VFS_PROC_NR, hdr, MIN(vp->v_size, PAGE_SIZE),
 		    &new_pos, &cum_io);
  if (r != OK) {
 	printf("VFS: exec: map_header: req_readwrite failed\n");
 	return(r);
  }
  *exec_hdr = hdr;
  return(OK);
 }
--- a/servers/avfs/exec.h
+++ b/servers/avfs/exec.h
@ -0,0 +1,19 @@
 #ifndef _VFS_EXEC_H_
 #define _VFS_EXEC_H_ 1
 struct exec_info {
    int  proc_e;			/* Process endpoint */
    char *hdr;				/* Exec file's header */
    vir_bytes pc;			/* Entry point of exec file */
    vir_bytes stack_top;		/* Top of the stack */
    vir_bytes frame_len;		/* Stack size */
    uid_t new_uid;			/* Process UID after exec */
    gid_t new_gid;			/* Process GID after exec */
    int load_text;			/* Load text section? */
    int allow_setuid;			/* Allow setuid execution? */
    struct vnode *vp;			/* Exec file's vnode */
    struct stat sb;			/* Exec file's stat structure */
    char progname[PROC_NAME_LEN];	/* Program name */
 };
 #endif /* !_VFS_EXEC_H_ */
--- a/servers/avfs/file.h
+++ b/servers/avfs/file.h
@ -0,0 +1,48 @@
 #ifndef __VFS_FILE_H__
 #define __VFS_FILE_H__
 /* This is the filp table.  It is an intermediary between file descriptors and
 * inodes.  A slot is free if filp_count == 0.
 */
 EXTERN struct filp {
  mode_t filp_mode;		/* RW bits, telling how file is opened */
  int filp_flags;		/* flags from open and fcntl */
  int filp_state;		/* state for crash recovery */
  int filp_count;		/* how many file descriptors share this slot?*/
  struct vnode *filp_vno;	/* vnode belonging to this file */
  u64_t filp_pos;		/* file position */
  mutex_t filp_lock;		/* lock to gain exclusive access */
  struct fproc *filp_softlock;	/* if not NULL; this filp didn't lock the
 				 * vnode. Another filp already holds a lock
 				 * for this thread */
  /* the following fields are for select() and are owned by the generic
   * select() code (i.e., fd-type-specific select() code can't touch these).
   */
  int filp_selectors;		/* select()ing processes blocking on this fd */
  int filp_select_ops;		/* interested in these SEL_* operations */
  int filp_select_flags;	/* Select flags for the filp */
  /* following are for fd-type-specific select() */
  int filp_pipe_select_ops;
 } filp[NR_FILPS];
 #define FILP_CLOSED	0	/* filp_mode: associated device closed */
 #define FS_NORMAL	0	/* file descriptor can be used normally */
 #define FS_NEEDS_REOPEN	1	/* file descriptor needs to be re-opened */
 #define FSF_UPDATE	001	/* The driver should be informed about new
 				 * state.
 				 */
 #define FSF_BUSY	002	/* Select operation sent to driver but no
 				 * reply yet.
 				 */
 #define FSF_RD_BLOCK	010	/* Read request is blocking, the driver should
 				 * keep state.
 				 */
 #define FSF_WR_BLOCK	020	/* Write request is blocking */
 #define FSF_ERR_BLOCK	040	/* Exception request is blocking */
 #define FSF_BLOCKED	070
 #endif
--- a/servers/avfs/filedes.c
+++ b/servers/avfs/filedes.c
@ -0,0 +1,556 @@
 /* This file contains the procedures that manipulate file descriptors.
 *
 * The entry points into this file are
 *   get_fd:	    look for free file descriptor and free filp slots
 *   get_filp:	    look up the filp entry for a given file descriptor
 *   find_filp:	    find a filp slot that points to a given vnode
 *   inval_filp:    invalidate a filp and associated fd's, only let close()
 *                  happen on it
 *   do_verify_fd:  verify whether the given file descriptor is valid for
 *                  the given endpoint.
 *   do_set_filp:   marks a filp as in-flight.
 *   do_copy_filp:  copies a filp to another endpoint.
 *   do_put_filp:   marks a filp as not in-flight anymore.
 *   do_cancel_fd:  cancel the transaction when something goes wrong for
 *                  the receiver.
 */
 #include <sys/select.h>
 #include <minix/callnr.h>
 #include <minix/u64.h>
 #include <assert.h>
 #include "fs.h"
 #include "file.h"
 #include "fproc.h"
 #include "vnode.h"
 FORWARD _PROTOTYPE( filp_id_t verify_fd, (endpoint_t ep, int fd)	);
 #if LOCK_DEBUG
 /*===========================================================================*
 *				check_filp_locks			     *
 *===========================================================================*/
 PUBLIC void check_filp_locks_by_me(void)
 {
 /* Check whether this thread still has filp locks held */
  struct filp *f;
  int r;
  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 	r = mutex_trylock(&f->filp_lock);
 	if (r == -EDEADLK)
 		panic("Thread %d still holds filp lock on filp %p call_nr=%d\n",
 		      mthread_self(), f, call_nr);
 	else if (r == 0) {
 		/* We just obtained the lock, release it */
 		mutex_unlock(&f->filp_lock);
 	}
  }
 }
 #endif
 /*===========================================================================*
 *				check_filp_locks			     *
 *===========================================================================*/
 PUBLIC void check_filp_locks(void)
 {
  struct filp *f;
  int r, count = 0;
  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 	r = mutex_trylock(&f->filp_lock);
 	if (r == -EBUSY) {
 		/* Mutex is still locked */
 		count++;
 	} else if (r == 0) {
 		/* We just obtained a lock, don't want it */
 		mutex_unlock(&f->filp_lock);
 	} else
 		panic("filp_lock weird state");
  }
  if (count) panic("locked filps");
 #if 0
  else printf("check_filp_locks OK\n");
 #endif
 }
 /*===========================================================================*
 *				init_filps					     *
 *===========================================================================*/
 PUBLIC void init_filps(void)
 {
 /* Initialize filps */
  struct filp *f;
  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 	mutex_init(&f->filp_lock, NULL);
  }
 }
 /*===========================================================================*
 *				get_fd					     *
 *===========================================================================*/
 PUBLIC int get_fd(int start, mode_t bits, int *k, struct filp **fpt)
 {
 /* Look for a free file descriptor and a free filp slot.  Fill in the mode word
 * in the latter, but don't claim either one yet, since the open() or creat()
 * may yet fail.
 */
  register struct filp *f;
  register int i;
  /* Search the fproc fp_filp table for a free file descriptor. */
  for (i = start; i < OPEN_MAX; i++) {
 	if (fp->fp_filp[i] == NULL && !FD_ISSET(i, &fp->fp_filp_inuse)) {
 		/* A file descriptor has been located. */
 		*k = i;
 		break;
 	}
  }
  /* Check to see if a file descriptor has been found. */
  if (i >= OPEN_MAX) return(EMFILE);
  /* If we don't care about a filp, return now */
  if (fpt == NULL) return(OK);
  /* Now that a file descriptor has been found, look for a free filp slot. */
  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 	assert(f->filp_count >= 0);
 	if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) {
 		if (verbose) printf("get_fd: locking filp=%p\n", f);
 		f->filp_mode = bits;
 		f->filp_pos = cvu64(0);
 		f->filp_selectors = 0;
 		f->filp_select_ops = 0;
 		f->filp_pipe_select_ops = 0;
 		f->filp_flags = 0;
 		f->filp_state = FS_NORMAL;
 		f->filp_select_flags = 0;
 		f->filp_softlock = NULL;
 		*fpt = f;
 		return(OK);
 	}
  }
  /* If control passes here, the filp table must be full.  Report that back. */
  return(ENFILE);
 }
 /*===========================================================================*
 *				get_filp				     *
 *===========================================================================*/
 PUBLIC struct filp *get_filp(fild, locktype)
 int fild;			/* file descriptor */
 tll_access_t locktype;
 {
 /* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
  return get_filp2(fp, fild, locktype);
 }
 /*===========================================================================*
 *				get_filp2				     *
 *===========================================================================*/
 PUBLIC struct filp *get_filp2(rfp, fild, locktype)
 register struct fproc *rfp;
 int fild;			/* file descriptor */
 tll_access_t locktype;
 {
 /* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
  struct filp *filp;
  err_code = EBADF;
  if (fild < 0 || fild >= OPEN_MAX ) return(NULL);
  if (rfp->fp_filp[fild] == NULL && FD_ISSET(fild, &rfp->fp_filp_inuse))
 	err_code = EIO;	/* The filedes is not there, but is not closed either.
 			 */
  if ((filp = rfp->fp_filp[fild]) != NULL) lock_filp(filp, locktype);
  return(filp);	/* may also be NULL */
 }
 /*===========================================================================*
 *				find_filp				     *
 *===========================================================================*/
 PUBLIC struct filp *find_filp(struct vnode *vp, mode_t bits)
 {
 /* Find a filp slot that refers to the vnode 'vp' in a way as described
 * by the mode bit 'bits'. Used for determining whether somebody is still
 * interested in either end of a pipe.  Also used when opening a FIFO to
 * find partners to share a filp field with (to shared the file position).
 * Like 'get_fd' it performs its job by linear search through the filp table.
 */
  struct filp *f;
  for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 	if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) {
 		return(f);
 	}
  }
  /* If control passes here, the filp wasn't there.  Report that back. */
  return(NULL);
 }
 /*===========================================================================*
 *				invalidate				     *
 *===========================================================================*/
 PUBLIC int invalidate(struct filp *fp)
 {
 /* Invalidate filp. fp_filp_inuse is not cleared, so filp can't be reused
   until it is closed first. */
  int f, fd, n = 0;
  for(f = 0; f < NR_PROCS; f++) {
 	if(fproc[f].fp_pid == PID_FREE) continue;
 	for(fd = 0; fd < OPEN_MAX; fd++) {
 		if(fproc[f].fp_filp[fd] && fproc[f].fp_filp[fd] == fp) {
 			fproc[f].fp_filp[fd] = NULL;
 			n++;
 		}
 	}
  }
  return(n);	/* Report back how often this filp has been invalidated. */
 }
 /*===========================================================================*
 *				lock_filp				     *
 *===========================================================================*/
 PUBLIC void lock_filp(filp, locktype)
 struct filp *filp;
 tll_access_t locktype;
 {
  message org_m_in;
  struct fproc *org_fp;
  struct worker_thread *org_self;
  struct vnode *vp;
  assert(filp->filp_count > 0);
  vp = filp->filp_vno;
  assert(vp != NULL);
  if (verbose)
 	printf("lock_filp: filp=%p locking vnode %p with locktype %d\n", filp,
 		vp, locktype);
  /* Lock vnode only if we haven't already locked it. If already locked by us,
   * we're allowed to have one additional 'soft' lock. */
  if (tll_locked_by_me(&vp->v_lock)) {
 	assert(filp->filp_softlock == NULL);
 	filp->filp_softlock = fp;
  } else {
 	lock_vnode(vp, locktype);
  }
  assert(vp->v_ref_count > 0);	/* vnode still in use? */
  assert(filp->filp_vno == vp);	/* vnode still what we think it is? */
  assert(filp->filp_count > 0); /* filp still in use? */
  /* First try to get filp lock right off the bat */
  if (mutex_trylock(&filp->filp_lock) != 0) {
 	/* Already in use, let's wait for our turn */
 	org_m_in = m_in;
 	org_fp = fp;
 	org_self = self;
 	assert(mutex_lock(&filp->filp_lock) == 0);
 	m_in = org_m_in;
 	fp = org_fp;
 	self = org_self;
  }
  assert(filp->filp_count > 0);	/* Yet again; filp still in use? */
 }
 /*===========================================================================*
 *				unlock_filp				     *
 *===========================================================================*/
 PUBLIC void unlock_filp(filp)
 struct filp *filp;
 {
  /* If this filp holds a soft lock on the vnode, we must be the owner */
  if (filp->filp_softlock != NULL)
 	assert(filp->filp_softlock == fp);
  if (filp->filp_count > 0) {
 	/* Only unlock vnode if filp is still in use */
 	/* and if we don't hold a soft lock */
 	if (filp->filp_softlock == NULL) {
 		assert(tll_islocked(&(filp->filp_vno->v_lock)));
 		unlock_vnode(filp->filp_vno);
 	}
  }
  filp->filp_softlock = NULL;
  assert(mutex_unlock(&filp->filp_lock) == 0);
 }
 /*===========================================================================*
 *				unlock_filps				     *
 *===========================================================================*/
 PUBLIC void unlock_filps(filp1, filp2)
 struct filp *filp1;
 struct filp *filp2;
 {
 /* Unlock two filps that are tied to the same vnode. As a thread can lock a
 * vnode only once, unlocking the vnode twice would result in an error. */
  /* No NULL pointers and not equal */
  assert(filp1);
  assert(filp2);
  assert(filp1 != filp2);
  /* Must be tied to the same vnode and not NULL */
  assert(filp1->filp_vno == filp2->filp_vno);
  assert(filp1->filp_vno != NULL);
  if (filp1->filp_count > 0 && filp2->filp_count > 0) {
 	/* Only unlock vnode if filps are still in use */
 	unlock_vnode(filp1->filp_vno);
  }
  filp1->filp_softlock = NULL;
  filp2->filp_softlock = NULL;
  assert(mutex_unlock(&filp2->filp_lock) == 0);
  assert(mutex_unlock(&filp1->filp_lock) == 0);
 }
 /*===========================================================================*
 *				verify_fd				     *
 *===========================================================================*/
 PRIVATE filp_id_t verify_fd(ep, fd)
 endpoint_t ep;
 int fd;
 {
 /* Verify whether the file descriptor 'fd' is valid for the endpoint 'ep'. When
 * the file descriptor is valid, verify_fd returns a pointer to that filp, else
 * it returns NULL.
 */
  int slot;
  struct filp *rfilp;
  if (isokendpt(ep, &slot) != OK)
 	return(NULL);
  rfilp = get_filp2(&fproc[slot], fd, VNODE_READ);
  return(rfilp);
 }
 /*===========================================================================*
 *                              do_verify_fd                                 *
 *===========================================================================*/
 PUBLIC int do_verify_fd(void)
 {
  struct filp *rfilp;
  rfilp = (struct filp *) verify_fd(m_in.USER_ENDPT, m_in.COUNT);
  m_out.ADDRESS = (void *) rfilp;
  if (rfilp != NULL) unlock_filp(rfilp);
  return (rfilp != NULL) ? OK : EINVAL;
 }
 /*===========================================================================*
 *                              set_filp                                     *
 *===========================================================================*/
 PUBLIC int set_filp(sfilp)
 filp_id_t sfilp;
 {
  if (sfilp == NULL) return(EINVAL);
  lock_filp(sfilp, VNODE_READ);
  sfilp->filp_count++;
  unlock_filp(sfilp);
  return(OK);
 }
 /*===========================================================================*
 *                              do_set_filp                                  *
 *===========================================================================*/
 PUBLIC int do_set_filp(void)
 {
  return set_filp((filp_id_t) m_in.ADDRESS);
 }
 /*===========================================================================*
 *                              copy_filp                                    *
 *===========================================================================*/
 PUBLIC int copy_filp(to_ep, cfilp)
 endpoint_t to_ep;
 filp_id_t cfilp;
 {
  int fd;
  int slot;
  struct fproc *rfp;
  if (isokendpt(to_ep, &slot) != OK) return(EINVAL);
  rfp = &fproc[slot];
  /* Find an open slot in fp_filp */
  for (fd = 0; fd < OPEN_MAX; fd++) {
 	if (rfp->fp_filp[fd] == NULL &&
 	    !FD_ISSET(fd, &rfp->fp_filp_inuse)) {
 		/* Found a free slot, add descriptor */
 		FD_SET(fd, &rfp->fp_filp_inuse);
 		rfp->fp_filp[fd] = cfilp;
 		rfp->fp_filp[fd]->filp_count++;
 		return(fd);
 	}
  }
  /* File descriptor table is full */
  return(EMFILE);
 }
 /*===========================================================================*
 *                              do_copy_filp                                 *
 *===========================================================================*/
 PUBLIC int do_copy_filp(void)
 {
  return copy_filp(m_in.USER_ENDPT, (filp_id_t) m_in.ADDRESS);
 }
 /*===========================================================================*
 *                              put_filp                                     *
 *===========================================================================*/
 PUBLIC int put_filp(pfilp)
 filp_id_t pfilp;
 {
  if (pfilp == NULL) {
 	return EINVAL;
  } else {
 	lock_filp(pfilp, VNODE_OPCL);
 	close_filp(pfilp);
 	return(OK);
  }
 }
 /*===========================================================================*
 *                              do_put_filp                                  *
 *===========================================================================*/
 PUBLIC int do_put_filp(void)
 {
  return put_filp((filp_id_t) m_in.ADDRESS);
 }
 /*===========================================================================*
 *                             cancel_fd				     *
 *===========================================================================*/
 PUBLIC int cancel_fd(ep, fd)
 endpoint_t ep;
 int fd;
 {
  int slot;
  struct fproc *rfp;
  struct filp *rfilp;
  if (isokendpt(ep, &slot) != OK) return(EINVAL);
  rfp = &fproc[slot];
  /* Check that the input 'fd' is valid */
  rfilp = (struct filp *) verify_fd(ep, fd);
  if (rfilp != NULL) {
 	/* Found a valid descriptor, remove it */
 	FD_CLR(fd, &rfp->fp_filp_inuse);
 	if (rfp->fp_filp[fd]->filp_count == 0) {
 		unlock_filp(rfilp);
 		printf("VFS: filp_count for slot %d fd %d already zero", slot,
 		      fd);
 		return(EINVAL);
 	}
 	rfp->fp_filp[fd]->filp_count--;
 	rfp->fp_filp[fd] = NULL;
 	unlock_filp(rfilp);
 	return(fd);
  }
  /* File descriptor is not valid for the endpoint. */
  return(EINVAL);
 }
 /*===========================================================================*
 *                              do_cancel_fd                                 *
 *===========================================================================*/
 PUBLIC int do_cancel_fd(void)
 {
  return cancel_fd(m_in.USER_ENDPT, m_in.COUNT);
 }
 /*===========================================================================*
 *				close_filp				     *
 *===========================================================================*/
 PUBLIC void close_filp(f)
 struct filp *f;
 {
 /* Close a file. Will also unlock filp when done */
  int mode_word, rw;
  dev_t dev;
  struct vnode *vp;
  /* Must be locked */
  assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
  assert(tll_islocked(&f->filp_vno->v_lock));
  vp = f->filp_vno;
  if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
 	/* Check to see if the file is special. */
 	mode_word = vp->v_mode & I_TYPE;
 	if (mode_word == I_CHAR_SPECIAL || mode_word == I_BLOCK_SPECIAL) {
 		dev = (dev_t) vp->v_sdev;
 		if (mode_word == I_BLOCK_SPECIAL)  {
 			lock_bsf();
 			if (vp->v_bfs_e == ROOT_FS_E) {
 				/* Invalidate the cache unless the special is
 				 * mounted. Assume that the root filesystem's
 				 * is open only for fsck.
 				 */
 				req_flush(vp->v_bfs_e, dev);
 			}
 			unlock_bsf();
 		}
 		/* Do any special processing on device close. */
 		(void) dev_close(dev, f-filp); /* Ignore any errors, even
 						 * SUSPEND. */
 		f->filp_mode = FILP_CLOSED;
 	}
  }
  /* If the inode being closed is a pipe, release everyone hanging on it. */
  if (vp->v_pipe == I_PIPE) {
 	rw = (f->filp_mode & R_BIT ? WRITE : READ);
 	release(vp, rw, NR_PROCS);
  }
  /* If a write has been done, the inode is already marked as DIRTY. */
  if (--f->filp_count == 0) {
 	if (vp->v_pipe == I_PIPE) {
 		/* Last reader or writer is going. Tell PFS about latest
 		 * pipe size.
 		 */
 		truncate_vnode(vp, vp->v_size);
 	}
 	unlock_vnode(f->filp_vno);
 	put_vnode(f->filp_vno);
  } else if (f->filp_count < 0) {
 	panic("VFS: invalid filp count: %d ino %d/%d", f->filp_count,
 	      vp->v_dev, vp->v_inode_nr);
  } else {
 	unlock_vnode(f->filp_vno);
  }
  mutex_unlock(&f->filp_lock);
 }
--- a/servers/avfs/fproc.h
+++ b/servers/avfs/fproc.h
@ -0,0 +1,72 @@
 #ifndef __VFS_FPROC_H__
 #define __VFS_FPROC_H__
 #include "threads.h"
 #include <sys/select.h>
 #include <minix/safecopies.h>
 /* This is the per-process information.  A slot is reserved for each potential
 * process. Thus NR_PROCS must be the same as in the kernel. It is not
 * possible or even necessary to tell when a slot is free here.
 */
 #define LOCK_DEBUG 0
 EXTERN struct fproc {
  unsigned fp_flags;
  pid_t fp_pid;			/* process id */
  endpoint_t fp_endpoint;	/* kernel endpoint number of this process */
  struct vnode *fp_wd;		/* working directory; NULL during reboot */
  struct vnode *fp_rd;		/* root directory; NULL during reboot */
  struct filp *fp_filp[OPEN_MAX];/* the file descriptor table */
  fd_set fp_filp_inuse;		/* which fd's are in use? */
  fd_set fp_cloexec_set;	/* bit map for POSIX Table 6-2 FD_CLOEXEC */
  dev_t fp_tty;			/* major/minor of controlling tty */
  int fp_block_fd;		/* place to save fd if rd/wr can't finish */
  int fp_block_callnr;		/* blocked call if rd/wr can't finish */
  char *fp_buffer;		/* place to save buffer if rd/wr can't finish*/
  int  fp_nbytes;		/* place to save bytes if rd/wr can't finish */
  int  fp_cum_io_partial;	/* partial byte count if rd/wr can't finish */
  endpoint_t fp_task;		/* which task is proc suspended on */
  int fp_blocked_on;		/* what is it blocked on */
  endpoint_t fp_ioproc;		/* proc no. in suspended-on i/o message */
  cp_grant_id_t fp_grant;	/* revoke this grant on unsuspend if > -1 */
  uid_t fp_realuid;		/* real user id */
  uid_t fp_effuid;		/* effective user id */
  gid_t fp_realgid;		/* real group id */
  gid_t fp_effgid;		/* effective group id */
  int fp_ngroups;		/* number of supplemental groups */
  gid_t fp_sgroups[NGROUPS_MAX];/* supplemental groups */
  mode_t fp_umask;		/* mask set by umask system call */
  message *fp_sendrec;		/* request/reply to/from FS/driver */
  mutex_t fp_lock;		/* mutex to lock fproc object */
  struct job fp_job;		/* pending job */
  thread_t fp_wtid;		/* Thread ID of worker */
 #if LOCK_DEBUG
  int fp_vp_rdlocks;		/* number of read-only locks on vnodes */
  int fp_vmnt_rdlocks;		/* number of read-only locks on vmnts */
 #endif
 } fproc[NR_PROCS];
 /* fp_flags */
 #define FP_NOFLAGS	00
 #define FP_SUSP_REOPEN	01	/* Process is suspended until the reopens are
 				 * completed (after the restart of a driver).
 				 */
 #define FP_REVIVED	02	/* Indicates process is being revived */
 #define FP_SESLDR	04	/* Set if process is session leader */
 #define FP_PENDING	010	/* Set if process has pending work */
 #define FP_EXITING	020	/* Set if process is exiting */
 #define FP_PM_PENDING	040	/* Set if process has pending PM request */
 /* Field values. */
 #define NOT_REVIVING       0xC0FFEEE	/* process is not being revived */
 #define REVIVING           0xDEEAD	/* process is being revived from suspension */
 #define PID_FREE	   0	/* process slot free */
 #endif /* __VFS_FPROC_H__ */
--- a/servers/avfs/fs.h
+++ b/servers/avfs/fs.h
@ -0,0 +1,51 @@
 #ifndef __VFS_FS_H__
 #define __VFS_FS_H__
 /* This is the master header for fs.  It includes some other files
 * and defines the principal constants.
 */
 #define _POSIX_SOURCE      1	/* tell headers to include POSIX stuff */
 #define _MINIX             1	/* tell headers to include MINIX stuff */
 #define _SYSTEM            1	/* tell headers that this is the kernel */
 #define DO_SANITYCHECKS	   0
 #if DO_SANITYCHECKS
 #define SANITYCHECK do { 			\
 	if(!check_vrefs() || !check_pipe()) {				\
 	   printf("VFS:%s:%d: call_nr %d who_e %d\n", \
 			__FILE__, __LINE__, call_nr, who_e); 	\
 	   panic("sanity check failed");	\
 	}							\
 } while(0)
 #else
 #define SANITYCHECK
 #endif
 /* The following are so basic, all the *.c files get them automatically. */
 #include <minix/config.h>	/* MUST be first */
 #include <minix/ansi.h>		/* MUST be second */
 #include <sys/types.h>
 #include <minix/const.h>
 #include <minix/type.h>
 #include <minix/dmap.h>
 #include <minix/ds.h>
 #include <minix/rs.h>
 #include <limits.h>
 #include <errno.h>
 #include <unistd.h>
 #include <minix/syslib.h>
 #include <minix/sysutil.h>
 #include <minix/timers.h>
 #include "const.h"
 #include "dmap.h"
 #include "proto.h"
 #include "threads.h"
 #include "glo.h"
 #include "comm.h"
 #include "vmnt.h"
 #endif
--- a/servers/avfs/fscall.c
+++ b/servers/avfs/fscall.c
@ -0,0 +1,136 @@
 /* This file handles nested counter-request calls to VFS sent by file system
 * (FS) servers in response to VFS requests.
 *
 * The entry points into this file are
 *   nested_fs_call	perform a nested call from a file system server
 *   nested_dev_call	perform a nested call from a device driver server
 *
 */
 #include "fs.h"
 #include "fproc.h"
 #include <string.h>
 #include <assert.h>
 #include <minix/callnr.h>
 #include <minix/endpoint.h>
 #include <minix/vfsif.h>
 /* maximum nested call stack depth */
 #define MAX_DEPTH 1
 /* global variables stack */
 PRIVATE struct {
  struct fproc *g_fp;			/* pointer to caller process */
  message g_m_in;			/* request message */
  message g_m_out;			/* reply message */
  int g_who_e;				/* endpoint of caller process */
  int g_who_p;				/* slot number of caller process */
  int g_call_nr;			/* call number */
  int g_super_user;			/* is the caller root? */
  char g_user_fullpath[PATH_MAX+1];	/* path to look up */
 } globals[MAX_DEPTH];
 PRIVATE int depth = 0;			/* current globals stack level */
 #if ENABLE_SYSCALL_STATS
 EXTERN unsigned long calls_stats[NCALLS];
 #endif
 FORWARD _PROTOTYPE( int push_globals, (void)				);
 FORWARD _PROTOTYPE( void pop_globals, (void)				);
 FORWARD _PROTOTYPE( void set_globals, (message *m)			);
 /*===========================================================================*
 *				push_globals				     *
 *===========================================================================*/
 PRIVATE int push_globals()
 {
 /* Save the global variables of the current call onto the globals stack.
 */
  if (depth == MAX_DEPTH)
 	return(EPERM);
  globals[depth].g_fp = fp;
  globals[depth].g_m_in = m_in;
  globals[depth].g_m_out = m_out;
  globals[depth].g_super_user = super_user;
  /* err_code is not used across blocking calls */
  depth++;
  return(OK);
 }
 /*===========================================================================*
 *				pop_globals				     *
 *===========================================================================*/
 PRIVATE void pop_globals()
 {
 /* Restore the global variables of a call from the globals stack.
 */
  if (depth == 0)
 	panic("Popping from empty globals stack!");
  depth--;
  fp = globals[depth].g_fp;
  m_in = globals[depth].g_m_in;
  m_out = globals[depth].g_m_out;
 }
 /*===========================================================================*
 *				set_globals				     *
 *===========================================================================*/
 PRIVATE void set_globals(m)
 message *m;				/* request message */
 {
 /* Initialize global variables based on a request message.
 */
  int proc_p;
  m_in = *m;
  proc_p = _ENDPOINT_P(m_in.m_source);
  fp = &fproc[proc_p];
  /* the rest need not be initialized */
 }
 /*===========================================================================*
 *				nested_fs_call				     *
 *===========================================================================*/
 PUBLIC void nested_fs_call(m)
 message *m;				/* request/reply message pointer */
 {
 /* Handle a nested call from a file system server.
 */
  int r;
  /* Save global variables of the current call */
  if ((r = push_globals()) != OK) {
 	printf("VFS: error saving global variables in call %d from FS %d\n",
 		m->m_type, m->m_source);
  } else {
 	/* Initialize global variables for the nested call */
 	set_globals(m);
 	/* Perform the nested call - only getsysinfo() is allowed right now */
 	if (call_nr == COMMON_GETSYSINFO) {
 		r = do_getsysinfo();
 	} else {
 		printf("VFS: invalid nested call %d from FS %d\n", call_nr,
 			who_e);
 		r = ENOSYS;
 	}
 	/* Store the result, and restore original global variables */
 	*m = m_out;
 	pop_globals();
  }
  m->m_type = r;
 }
--- a/servers/avfs/gcov.c
+++ b/servers/avfs/gcov.c
@ -0,0 +1,66 @@
 #include "fs.h"
 #include "file.h"
 #include "fproc.h"
 _PROTOTYPE( int gcov_flush, (cp_grant_id_t grantid, size_t size ));
 /*===========================================================================*
 *				do_gcov_flush				*
 *===========================================================================*/
 PUBLIC int do_gcov_flush()
 {
 /* A userland tool has requested the gcov data from another
 * process (possibly vfs itself). Grant the target process
 * access to the supplied buffer, and perform the call that
 * makes the target copy its buffer to the caller (incl vfs
 * itself).
 */
  struct fproc *rfp;
  ssize_t size;
  cp_grant_id_t grantid;
  int r, n;
  pid_t target;
  message m;
  size = m_in.GCOV_BUFF_SZ;
  target = m_in.GCOV_PID;
  /* If the wrong process is sent to, the system hangs; so make this root-only.
   */
  if (!super_user) return(EPERM);
  /* Find target gcov process. */
  for(n = 0; n < NR_PROCS; n++) {
 	if(fproc[n].fp_endpoint != NONE && fproc[n].fp_pid == target)
 		 break;
  }
  if(n >= NR_PROCS) {
 	printf("VFS: gcov process %d not found\n", target);
 	return(ESRCH);
  }
  rfp = &fproc[n];
  /* Grant target process to requestor's buffer. */
  if ((grantid = cpf_grant_magic(rfp->fp_endpoint, who_e,
 				 (vir_bytes) m_in.GCOV_BUFF_P, size,
 				 CPF_WRITE)) < 0) {
 	printf("VFS: gcov_flush: grant failed\n");
 	return(ENOMEM);
  }
  if(rfp->fp_endpoint == VFS_PROC_NR) {
 	/* Request is for VFS itself. */
 	r = gcov_flush(grantid, size);
  } else {
 	/* Perform generic GCOV request. */
 	m.GCOV_GRANT = grantid;
 	m.GCOV_BUFF_SZ = size;
 	r = _taskcall(rfp->fp_endpoint, COMMON_REQ_GCOV_DATA, &m);
  }
  cpf_revoke(grantid);
  return(r);
 }
--- a/servers/avfs/glo.h
+++ b/servers/avfs/glo.h
@ -0,0 +1,57 @@
 #ifndef __VFS_GLO_H__
 #define __VFS_GLO_H__
 /* EXTERN should be extern except for the table file */
 #ifdef _TABLE
 #undef EXTERN
 #define EXTERN
 #endif
 /* File System global variables */
 EXTERN struct fproc *fp;	/* pointer to caller's fproc struct */
 EXTERN int susp_count;		/* number of procs suspended on pipe */
 EXTERN int nr_locks;		/* number of locks currently in place */
 EXTERN int reviving;		/* number of pipe processes to be revived */
 EXTERN int pending;
 EXTERN int sending;
 EXTERN dev_t ROOT_DEV;		/* device number of the root device */
 EXTERN int ROOT_FS_E;           /* kernel endpoint of the root FS proc */
 EXTERN u32_t system_hz;		/* system clock frequency. */
 /* The parameters of the call are kept here. */
 EXTERN message m_in;		/* the input message itself */
 EXTERN message m_out;		/* the output message used for reply */
 # define who_p		((int) (fp - fproc))
 # define isokslot(p)	(p >= 0 && \
 			 p < (int)(sizeof(fproc) / sizeof(struct fproc)))
 #if 0
 # define who_e		(isokslot(who_p) ? fp->fp_endpoint : m_in.m_source)
 #else
 # define who_e		(isokslot(who_p) && fp->fp_endpoint != NONE ? \
 					fp->fp_endpoint : m_in.m_source)
 #endif
 # define call_nr	(m_in.m_type)
 # define super_user	(fp->fp_effuid == SU_UID ? 1 : 0)
 EXTERN struct worker_thread *self;
 EXTERN endpoint_t receive_from;/* endpoint with pending reply */
 EXTERN int force_sync;		/* toggle forced synchronous communication */
 EXTERN int verbose;
 EXTERN int deadlock_resolving;
 EXTERN mutex_t exec_lock;
 EXTERN mutex_t bsf_lock;/* Global lock for access to block special files */
 EXTERN struct worker_thread workers[NR_WTHREADS];
 EXTERN struct worker_thread sys_worker;
 EXTERN struct worker_thread dl_worker;
 EXTERN char mount_label[LABEL_MAX];	/* label of file system to mount */
 /* The following variables are used for returning results to the caller. */
 EXTERN int err_code;		/* temporary storage for error number */
 /* Data initialized elsewhere. */
 extern _PROTOTYPE (int (*call_vec[]), (void) ); /* sys call table */
 extern _PROTOTYPE (int (*pfs_call_vec[]), (void) ); /* pfs callback table */
 extern char dot1[2];   /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */
 extern char dot2[3];   /* meaning to search_dir: no access permission check. */
 #endif
--- a/servers/avfs/job.h
+++ b/servers/avfs/job.h
@ -0,0 +1,11 @@
 #ifndef __VFS_WORK_H__
 #define __VFS_WORK_H__
 struct job {
  struct fproc *j_fp;
  message j_m_in;
  void *(*j_func)(void *arg);
  struct job *j_next;
 };
 #endif
--- a/servers/avfs/link.c
+++ b/servers/avfs/link.c
@ -0,0 +1,455 @@
 /* This file handles the LINK and UNLINK system calls.  It also deals with
 * deallocating the storage used by a file when the last UNLINK is done to a
 * file and the blocks must be returned to the free block pool.
 *
 * The entry points into this file are
 *   do_link:         perform the LINK system call
 *   do_unlink:	      perform the UNLINK and RMDIR system calls
 *   do_rename:	      perform the RENAME system call
 *   do_truncate:     perform the TRUNCATE system call
 *   do_ftruncate:    perform the FTRUNCATE system call
 *   do_rdlink:       perform the RDLNK system call
 */
 #include "fs.h"
 #include <sys/stat.h>
 #include <string.h>
 #include <minix/com.h>
 #include <minix/callnr.h>
 #include <minix/vfsif.h>
 #include <dirent.h>
 #include <assert.h>
 #include "file.h"
 #include "fproc.h"
 #include "path.h"
 #include "vnode.h"
 #include "param.h"
 /*===========================================================================*
 *				do_link					     *
 *===========================================================================*/
 PUBLIC int do_link()
 {
 /* Perform the link(name1, name2) system call. */
  int r = OK;
  struct vnode *vp = NULL, *dirp = NULL;
  struct vmnt *vmp1 = NULL, *vmp2 = NULL;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp1, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  /* See if 'name1' (file to be linked to) exists. */
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  /* Does the final directory of 'name2' exist? */
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp2, &dirp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
 	r = err_code;
  else if ((dirp = last_dir(&resolve, fp)) == NULL)
 	r = err_code;
  if (r != OK) {
 	unlock_vnode(vp);
 	unlock_vmnt(vmp1);
 	put_vnode(vp);
 	return(r);
  }
  /* Check for links across devices. */
  if (vp->v_fs_e != dirp->v_fs_e)
 	r = EXDEV;
  else
 	r = forbidden(dirp, W_BIT | X_BIT);
  if (r == OK)
 	r = req_link(vp->v_fs_e, dirp->v_inode_nr, fullpath,
 		     vp->v_inode_nr);
  unlock_vnode(vp);
  unlock_vnode(dirp);
  if (vmp2 != NULL) unlock_vmnt(vmp2);
  unlock_vmnt(vmp1);
  put_vnode(vp);
  put_vnode(dirp);
  return(r);
 }
 /*===========================================================================*
 *				do_unlink				     *
 *===========================================================================*/
 PUBLIC int do_unlink()
 {
 /* Perform the unlink(name) or rmdir(name) system call. The code for these two
 * is almost the same.  They differ only in some condition testing.  Unlink()
 * may be used by the superuser to do dangerous things; rmdir() may not.
 */
  struct vnode *dirp, *vp;
  struct vmnt *vmp, *vmp2;
  int r;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &dirp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  /* Get the last directory in the path. */
  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
 	return(err_code);
  if ((dirp = last_dir(&resolve, fp)) == NULL) return(err_code);
  /* Make sure that the object is a directory */
  if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) {
 	unlock_vnode(dirp);
 	unlock_vmnt(vmp);
 	put_vnode(dirp);
 	return(ENOTDIR);
  }
  /* The caller must have both search and execute permission */
  if ((r = forbidden(dirp, X_BIT | W_BIT)) != OK) {
 	unlock_vnode(dirp);
 	unlock_vmnt(vmp);
 	put_vnode(dirp);
 	return(r);
  }
  /* Also, if the sticky bit is set, only the owner of the file or a privileged
     user is allowed to unlink */
  if ((dirp->v_mode & S_ISVTX) == S_ISVTX) {
 	/* Look up inode of file to unlink to retrieve owner */
 	resolve.l_flags = PATH_RET_SYMLINK;
 	resolve.l_vmp = &vmp2;	/* Shouldn't actually get locked */
 	resolve.l_vmnt_lock = VMNT_READ;
 	resolve.l_vnode = &vp;
 	resolve.l_vnode_lock = VNODE_READ;
 	vp = advance(dirp, &resolve, fp);
 	assert(vmp2 == NULL);
 	if (vp != NULL) {
 		if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
 			r = EPERM;
 		unlock_vnode(vp);
 		put_vnode(vp);
 	} else
 		r = err_code;
 	if (r != OK) {
 		unlock_vnode(dirp);
 		unlock_vmnt(vmp);
 		put_vnode(dirp);
 		return(r);
 	}
  }
  tll_upgrade(&vmp->m_lock);
  if(call_nr == UNLINK)
 	  r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
  else
 	  r = req_rmdir(dirp->v_fs_e, dirp->v_inode_nr, fullpath);
  unlock_vnode(dirp);
  unlock_vmnt(vmp);
  put_vnode(dirp);
  return(r);
 }
 /*===========================================================================*
 *				do_rename				     *
 *===========================================================================*/
 PUBLIC int do_rename()
 {
 /* Perform the rename(name1, name2) system call. */
  int r = OK, r1;
  struct vnode *old_dirp, *new_dirp = NULL, *vp;
  struct vmnt *oldvmp, *newvmp, *vmp2;
  char old_name[PATH_MAX+1];
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &oldvmp, &old_dirp);
  /* Do not yet request exclusive lock on vmnt to prevent deadlocks later on */
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  /* See if 'name1' (existing file) exists.  Get dir and file inodes. */
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  if ((old_dirp = last_dir(&resolve, fp)) == NULL)
 	return(err_code);
  /* If the sticky bit is set, only the owner of the file or a privileged
     user is allowed to rename */
  if ((old_dirp->v_mode & S_ISVTX) == S_ISVTX) {
 	/* Look up inode of file to unlink to retrieve owner */
 	resolve.l_flags = PATH_RET_SYMLINK;
 	resolve.l_vmp = &vmp2;	/* Shouldn't actually get locked */
 	resolve.l_vmnt_lock = VMNT_READ;
 	resolve.l_vnode = &vp;
 	resolve.l_vnode_lock = VNODE_READ;
 	resolve.l_flags = PATH_RET_SYMLINK;
 	vp = advance(old_dirp, &resolve, fp);
 	assert(vmp2 == NULL);
 	if (vp != NULL) {
 		if(vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
 			r = EPERM;
 		unlock_vnode(vp);
 		put_vnode(vp);
 	} else
 		r = err_code;
 	if (r != OK) {
 		unlock_vnode(old_dirp);
 		unlock_vmnt(oldvmp);
 		put_vnode(old_dirp);
 		return(r);
 	}
  }
  /* Save the last component of the old name */
  if(strlen(fullpath) >= sizeof(old_name)) {
 	unlock_vnode(old_dirp);
 	unlock_vmnt(oldvmp);
 	put_vnode(old_dirp);
 	return(ENAMETOOLONG);
  }
  strcpy(old_name, fullpath);
  /* See if 'name2' (new name) exists.  Get dir inode */
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &newvmp, &new_dirp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
 	r = err_code;
  else if ((new_dirp = last_dir(&resolve, fp)) == NULL)
 	r = err_code;
  if (r != OK) {
 	unlock_vnode(old_dirp);
 	unlock_vmnt(oldvmp);
 	put_vnode(old_dirp);
 	return(r);
  }
  /* Both parent directories must be on the same device. */
  if (old_dirp->v_fs_e != new_dirp->v_fs_e) r = EXDEV;
  /* Parent dirs must be writable, searchable and on a writable device */
  if ((r1 = forbidden(old_dirp, W_BIT|X_BIT)) != OK ||
      (r1 = forbidden(new_dirp, W_BIT|X_BIT)) != OK) r = r1;
  if (r == OK) {
 	tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */
 	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name,
 		       new_dirp->v_inode_nr, fullpath);
  }
  unlock_vnode(old_dirp);
  unlock_vnode(new_dirp);
  unlock_vmnt(oldvmp);
  if (newvmp) unlock_vmnt(newvmp);
  put_vnode(old_dirp);
  put_vnode(new_dirp);
  return(r);
 }
 /*===========================================================================*
 *				do_truncate				     *
 *===========================================================================*/
 PUBLIC int do_truncate()
 {
 /* truncate_vnode() does the actual work of do_truncate() and do_ftruncate().
 * do_truncate() and do_ftruncate() have to get hold of the inode, either
 * by name or fd, do checks on it, and call truncate_inode() to do the
 * work.
 */
  struct vnode *vp;
  struct vmnt *vmp;
  int r;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_EXCL;
  resolve.l_vnode_lock = VNODE_WRITE;
  if ((off_t) m_in.flength < 0) return(EINVAL);
  /* Temporarily open file */
  if (fetch_name(m_in.m2_p1, m_in.m2_i1, M1, fullpath) != OK) return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  /* Ask FS to truncate the file */
  if ((r = forbidden(vp, W_BIT)) == OK)
 	r = truncate_vnode(vp, m_in.flength);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				do_ftruncate				     *
 *===========================================================================*/
 PUBLIC int do_ftruncate()
 {
 /* As with do_truncate(), truncate_vnode() does the actual work. */
  struct filp *rfilp;
  int r;
  if ((off_t) m_in.flength < 0) return(EINVAL);
  /* File is already opened; get a vnode pointer from filp */
  if ((rfilp = get_filp(m_in.m2_i1, VNODE_WRITE)) == NULL) return(err_code);
  if (!(rfilp->filp_mode & W_BIT))
 	r = EBADF;
  else
 	r = truncate_vnode(rfilp->filp_vno, m_in.flength);
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				truncate_vnode				     *
 *===========================================================================*/
 PUBLIC int truncate_vnode(vp, newsize)
 struct vnode *vp;
 off_t newsize;
 {
 /* Truncate a regular file or a pipe */
  int r, file_type;
  assert(tll_locked_by_me(&vp->v_lock));
  file_type = vp->v_mode & I_TYPE;
  if (file_type != I_REGULAR && file_type != I_NAMED_PIPE) return(EINVAL);
  if ((r = req_ftrunc(vp->v_fs_e, vp->v_inode_nr, newsize, 0)) == OK)
 	vp->v_size = newsize;
  return(r);
 }
 /*===========================================================================*
 *                             do_slink					     *
 *===========================================================================*/
 PUBLIC int do_slink()
 {
 /* Perform the symlink(name1, name2) system call. */
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  if (m_in.name1_length <= 1) return(ENOENT);
  if (m_in.name1_length >= SYMLINK_MAX) return(ENAMETOOLONG);
  /* Get dir inode of 'name2' */
  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
  if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
 	r = req_slink(vp->v_fs_e, vp->v_inode_nr, fullpath, who_e,
 		      m_in.name1, m_in.name1_length - 1, fp->fp_effuid,
 		      fp->fp_effgid);
  }
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *                              rdlink_direct                                *
 *===========================================================================*/
 PUBLIC int rdlink_direct(orig_path, link_path, rfp)
 char *orig_path;
 char *link_path; /* should have length PATH_MAX+1 */
 struct fproc *rfp;
 {
 /* Perform a readlink()-like call from within the VFS */
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  /* Temporarily open the file containing the symbolic link */
  strncpy(fullpath, orig_path, PATH_MAX);
  if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
  /* Make sure this is a symbolic link */
  if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK)
 	r = EINVAL;
  else
 	r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, (endpoint_t) 0,
 						link_path, PATH_MAX+1, 1);
  if (r > 0) link_path[r] = '\0';	/* Terminate string when succesful */
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return r;
 }
 /*===========================================================================*
 *                             do_rdlink                                    *
 *===========================================================================*/
 PUBLIC int do_rdlink()
 {
 /* Perform the readlink(name, buf, bufsize) system call. */
  int r, copylen;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  copylen = m_in.nbytes;
  if (copylen < 0) return(EINVAL);
  /* Temporarily open the file containing the symbolic link */
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  /* Make sure this is a symbolic link */
  if ((vp->v_mode & I_TYPE) != I_SYMBOLIC_LINK)
 	r = EINVAL;
  else
 	r = req_rdlink(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2,
 		       copylen, 0);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
--- a/servers/avfs/lock.c
+++ b/servers/avfs/lock.c
@ -0,0 +1,191 @@
 /* This file handles advisory file locking as required by POSIX.
 *
 * The entry points into this file are
 *   lock_op:	perform locking operations for FCNTL system call
 *   lock_revive: revive processes when a lock is released
 */
 #include "fs.h"
 #include <minix/com.h>
 #include <minix/u64.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include "file.h"
 #include "fproc.h"
 #include "lock.h"
 #include "vnode.h"
 #include "param.h"
 /*===========================================================================*
 *				lock_op					     *
 *===========================================================================*/
 PUBLIC int lock_op(f, req)
 struct filp *f;
 int req;			/* either F_SETLK or F_SETLKW */
 {
 /* Perform the advisory locking required by POSIX. */
  int r, ltype, i, conflict = 0, unlocking = 0;
  mode_t mo;
  off_t first, last;
  struct flock flock;
  vir_bytes user_flock;
  struct file_lock *flp, *flp2, *empty;
  /* Fetch the flock structure from user space. */
  user_flock = (vir_bytes) m_in.name1;
  r = sys_datacopy(who_e, (vir_bytes) user_flock, VFS_PROC_NR,
 		   (vir_bytes) &flock, (phys_bytes) sizeof(flock));
  if (r != OK) return(EINVAL);
  /* Make some error checks. */
  ltype = flock.l_type;
  mo = f->filp_mode;
  if (ltype != F_UNLCK && ltype != F_RDLCK && ltype != F_WRLCK) return(EINVAL);
  if (req == F_GETLK && ltype == F_UNLCK) return(EINVAL);
  if ( (f->filp_vno->v_mode & I_TYPE) != I_REGULAR) return(EINVAL);
  if (req != F_GETLK && ltype == F_RDLCK && (mo & R_BIT) == 0) return(EBADF);
  if (req != F_GETLK && ltype == F_WRLCK && (mo & W_BIT) == 0) return(EBADF);
  /* Compute the first and last bytes in the lock region. */
  switch (flock.l_whence) {
    case SEEK_SET:	first = 0; break;
    case SEEK_CUR:
 	if (ex64hi(f->filp_pos) != 0)
 		panic("lock_op: position in file too high");
 	first = ex64lo(f->filp_pos);
 	break;
    case SEEK_END:	first = f->filp_vno->v_size; break;
    default:	return(EINVAL);
  }
  /* Check for overflow. */
  if (((long) flock.l_start > 0) && ((first + flock.l_start) < first))
 	return(EINVAL);
  if (((long) flock.l_start < 0) && ((first + flock.l_start) > first))
 	return(EINVAL);
  first = first + flock.l_start;
  last = first + flock.l_len - 1;
  if (flock.l_len == 0) last = MAX_FILE_POS;
  if (last < first) return(EINVAL);
  /* Check if this region conflicts with any existing lock. */
  empty = NULL;
  for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) {
 	if (flp->lock_type == 0) {
 		if (empty == NULL) empty = flp;
 		continue;	/* 0 means unused slot */
 	}
 	if (flp->lock_vnode != f->filp_vno) continue;	/* different file */
 	if (last < flp->lock_first) continue;	/* new one is in front */
 	if (first > flp->lock_last) continue;	/* new one is afterwards */
 	if (ltype == F_RDLCK && flp->lock_type == F_RDLCK) continue;
 	if (ltype != F_UNLCK && flp->lock_pid == fp->fp_pid) continue;
 	/* There might be a conflict.  Process it. */
 	conflict = 1;
 	if (req == F_GETLK) break;
 	/* If we are trying to set a lock, it just failed. */
 	if (ltype == F_RDLCK || ltype == F_WRLCK) {
 		if (req == F_SETLK) {
 			/* For F_SETLK, just report back failure. */
 			return(EAGAIN);
 		} else {
 			/* For F_SETLKW, suspend the process. */
 			suspend(FP_BLOCKED_ON_LOCK);
 			return(SUSPEND);
 		}
 	}
 	/* We are clearing a lock and we found something that overlaps. */
 	unlocking = 1;
 	if (first <= flp->lock_first && last >= flp->lock_last) {
 		flp->lock_type = 0;	/* mark slot as unused */
 		nr_locks--;		/* number of locks is now 1 less */
 		continue;
 	}
 	/* Part of a locked region has been unlocked. */
 	if (first <= flp->lock_first) {
 		flp->lock_first = last + 1;
 		continue;
 	}
 	if (last >= flp->lock_last) {
 		flp->lock_last = first - 1;
 		continue;
 	}
 	/* Bad luck. A lock has been split in two by unlocking the middle. */
 	if (nr_locks == NR_LOCKS) return(ENOLCK);
 	for (i = 0; i < NR_LOCKS; i++)
 		if (file_lock[i].lock_type == 0) break;
 	flp2 = &file_lock[i];
 	flp2->lock_type = flp->lock_type;
 	flp2->lock_pid = flp->lock_pid;
 	flp2->lock_vnode = flp->lock_vnode;
 	flp2->lock_first = last + 1;
 	flp2->lock_last = flp->lock_last;
 	flp->lock_last = first - 1;
 	nr_locks++;
  }
  if (unlocking) lock_revive();
  if (req == F_GETLK) {
 	if (conflict) {
 		/* GETLK and conflict. Report on the conflicting lock. */
 		flock.l_type = flp->lock_type;
 		flock.l_whence = SEEK_SET;
 		flock.l_start = flp->lock_first;
 		flock.l_len = flp->lock_last - flp->lock_first + 1;
 		flock.l_pid = flp->lock_pid;
 	} else {
 		/* It is GETLK and there is no conflict. */
 		flock.l_type = F_UNLCK;
 	}
 	/* Copy the flock structure back to the caller. */
 	r = sys_datacopy(VFS_PROC_NR, (vir_bytes) &flock,
 		who_e, (vir_bytes) user_flock, (phys_bytes) sizeof(flock));
 	return(r);
  }
  if (ltype == F_UNLCK) return(OK);	/* unlocked a region with no locks */
  /* There is no conflict.  If space exists, store new lock in the table. */
  if (empty == NULL) return(ENOLCK);	/* table full */
  empty->lock_type = ltype;
  empty->lock_pid = fp->fp_pid;
  empty->lock_vnode = f->filp_vno;
  empty->lock_first = first;
  empty->lock_last = last;
  nr_locks++;
  return(OK);
 }
 /*===========================================================================*
 *				lock_revive				     *
 *===========================================================================*/
 PUBLIC void lock_revive()
 {
 /* Go find all the processes that are waiting for any kind of lock and
 * revive them all.  The ones that are still blocked will block again when
 * they run.  The others will complete.  This strategy is a space-time
 * tradeoff.  Figuring out exactly which ones to unblock now would take
 * extra code, and the only thing it would win would be some performance in
 * extremely rare circumstances (namely, that somebody actually used
 * locking).
 */
  struct fproc *fptr;
  for (fptr = &fproc[0]; fptr < &fproc[NR_PROCS]; fptr++){
 	if (fptr->fp_pid == PID_FREE) continue;
 	if (fptr->fp_blocked_on == FP_BLOCKED_ON_LOCK) {
 		revive(fptr->fp_endpoint, 0);
 	}
  }
 }
--- a/servers/avfs/lock.h
+++ b/servers/avfs/lock.h
@ -0,0 +1,15 @@
 #ifndef __VFS_LOCK_H__
 #define __VFS_LOCK_H__
 /* This is the file locking table.  Like the filp table, it points to the
 * inode table, however, in this case to achieve advisory locking.
 */
 EXTERN struct file_lock {
  short lock_type;		/* F_RDLOCK or F_WRLOCK; 0 means unused slot */
  pid_t lock_pid;		/* pid of the process holding the lock */
  struct vnode *lock_vnode;
  off_t lock_first;		/* offset of first byte locked */
  off_t lock_last;		/* offset of last byte locked */
 } file_lock[NR_LOCKS];
 #endif
--- a/servers/avfs/main.c
+++ b/servers/avfs/main.c
@ -0,0 +1,967 @@
 /*
 * a loop that gets messages requesting work, carries out the work, and sends
 * replies.
 *
 * The entry points into this file are:
 *   main:	main program of the Virtual File System
 *   reply:	send a reply to a process after the requested work is done
 *
 */
 #include "fs.h"
 #include <fcntl.h>
 #include <string.h>
 #include <stdio.h>
 #include <signal.h>
 #include <assert.h>
 #include <stdlib.h>
 #include <sys/ioc_memory.h>
 #include <sys/svrctl.h>
 #include <sys/select.h>
 #include <minix/callnr.h>
 #include <minix/com.h>
 #include <minix/keymap.h>
 #include <minix/const.h>
 #include <minix/endpoint.h>
 #include <minix/safecopies.h>
 #include <minix/debug.h>
 #include <minix/vfsif.h>
 #include "file.h"
 #include "dmap.h"
 #include "fproc.h"
 #include "vmnt.h"
 #include "vnode.h"
 #include "job.h"
 #include "param.h"
 #if ENABLE_SYSCALL_STATS
 EXTERN unsigned long calls_stats[NCALLS];
 #endif
 /* Thread related prototypes */
 FORWARD _PROTOTYPE( void thread_cleanup_f, (struct fproc *rfp, char *f,
 					    int l)			);
 #define thread_cleanup(x) thread_cleanup_f(x, __FILE__, __LINE__)
 FORWARD _PROTOTYPE( void *do_async_dev_result, (void *arg)		);
 FORWARD _PROTOTYPE( void *do_control_msgs, (void *arg)			);
 FORWARD _PROTOTYPE( void *do_fs_reply, (struct job *job)			);
 FORWARD _PROTOTYPE( void *do_work, (void *arg)				);
 FORWARD _PROTOTYPE( void *do_pm, (void *arg)				);
 FORWARD _PROTOTYPE( void *do_init_root, (void *arg)			);
 FORWARD _PROTOTYPE( void handle_work, (void *(*func)(void *arg))		);
 FORWARD _PROTOTYPE( void get_work, (void)				);
 FORWARD _PROTOTYPE( void lock_pm, (void)				);
 FORWARD _PROTOTYPE( void unlock_pm, (void)				);
 FORWARD _PROTOTYPE( void service_pm, (void)				);
 FORWARD _PROTOTYPE( void service_pm_postponed, (void)				);
 FORWARD _PROTOTYPE( int unblock, (struct fproc *rfp)			);
 /* SEF functions and variables. */
 FORWARD _PROTOTYPE( void sef_local_startup, (void) );
 FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
 PRIVATE mutex_t pm_lock;
 /*===========================================================================*
 *				main					     *
 *===========================================================================*/
 PUBLIC int main(void)
 {
 /* This is the main program of the file system.  The main loop consists of
 * three major activities: getting new work, processing the work, and sending
 * the reply.  This loop never terminates as long as the file system runs.
 */
  int transid, req;
  struct job *job;
  /* SEF local startup. */
  sef_local_startup();
  printf("Started AVFS\n");
  verbose = 0;
  /* This is the main loop that gets work, processes it, and sends replies. */
  while (TRUE) {
 	yield_all();	/* let other threads run */
 	send_work();
 	get_work();
 	transid = TRNS_GET_ID(m_in.m_type);
 	req = TRNS_DEL_ID(m_in.m_type);
 	job = worker_getjob( (thread_t) transid - VFS_TRANSID);
 	/* Transaction encoding changes original m_type value; restore. */
 	if (job == NULL)
 		m_in.m_type = transid;
 	else
 		m_in.m_type = req;
 	if (job != NULL) {
 		do_fs_reply(job);
 		continue;
 	} else if (who_e == PM_PROC_NR) { /* Calls from PM */
 		/* Special control messages from PM */
 		sys_worker_start(do_pm);
 		continue;
 	} else if (is_notify(call_nr)) {
 		/* A task notify()ed us */
 		sys_worker_start(do_control_msgs);
 		continue;
 	} else if (who_p < 0) { /* i.e., message comes from a task */
 		/* We're going to ignore this message. Tasks should
 		 * send notify()s only.
 		 */
 		 printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
 		 continue;
 	}
 	/* At this point we either have results from an asynchronous device
 	 * or a new system call. In both cases a new worker thread has to be
 	 * started and there might not be one available from the pool. This is
 	 * not a problem (requests/replies are simply queued), except when
 	 * they're from an FS endpoint, because these can cause a deadlock.
 	 * handle_work() takes care of the details. */
 	if (IS_DEV_RS(call_nr)) {
 		/* We've got results for a device request */
 		handle_work(do_async_dev_result);
 		continue;
 	} else {
 		/* Normal syscall. */
 		handle_work(do_work);
 	}
  }
  return(OK);				/* shouldn't come here */
 }
 /*===========================================================================*
 *			       handle_work				     *
 *===========================================================================*/
 PRIVATE void handle_work(void *(*func)(void *arg))
 {
 /* Handle asynchronous device replies and new system calls. If the originating
 * endpoint is an FS endpoint, take extra care not to get in deadlock. */
 struct vmnt *vmp;
  if ((vmp = find_vmnt(who_e)) != NULL) {
 	/* A back call or dev result from an FS endpoint */
 	if (worker_available() == 0) {
 		/* No worker threads available to handle call */
 		if (deadlock_resolving) {
 			/* Already trying to resolve a deadlock, can't
 			 * handle more, sorry */
 			reply(who_e, EAGAIN);
 			return;
 		}
 		deadlock_resolving = 1;
 		vmp->m_flags |= VMNT_BACKCALL;
 		dl_worker_start(func);
 		return;
 	}
  }
  worker_start(func);
 }
 /*===========================================================================*
 *			       do_async_dev_result				     *
 *===========================================================================*/
 PRIVATE void *do_async_dev_result(void *arg)
 {
  endpoint_t endpt;
  struct job my_job;
  my_job = *((struct job *) arg);
  fp = my_job.j_fp;
  m_in = my_job.j_m_in;
  /* An asynchronous character driver has results for us */
  if (call_nr == DEV_REVIVE) {
 	endpt = m_in.REP_ENDPT;
 	if (endpt == VFS_PROC_NR)
 		endpt = find_suspended_ep(m_in.m_source, m_in.REP_IO_GRANT);
 	if (endpt == NONE) {
 		printf("VFS: proc with grant %d from %d not found\n",
 			m_in.REP_IO_GRANT, m_in.m_source);
 	} else if (m_in.REP_STATUS == SUSPEND) {
 		printf("VFS: got SUSPEND on DEV_REVIVE: not reviving proc\n");
 	} else
 		revive(endpt, m_in.REP_STATUS);
  }
  else if (call_nr == DEV_OPEN_REPL) open_reply();
  else if (call_nr == DEV_REOPEN_REPL) reopen_reply();
  else if (call_nr == DEV_CLOSE_REPL) close_reply();
  else if (call_nr == DEV_SEL_REPL1)
 	select_reply1(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS);
  else if (call_nr == DEV_SEL_REPL2)
 	select_reply2(m_in.m_source, m_in.DEV_MINOR, m_in.DEV_SEL_OPS);
  if (deadlock_resolving) {
 	struct vmnt *vmp;
 	if ((vmp = find_vmnt(who_e)) != NULL)
 		vmp->m_flags &= ~VMNT_BACKCALL;
 	if (fp != NULL && fp->fp_wtid == dl_worker.w_tid)
 		deadlock_resolving = 0;
  }
  thread_cleanup(NULL);
  return(NULL);
 }
 /*===========================================================================*
 *			       do_control_msgs				     *
 *===========================================================================*/
 PRIVATE void *do_control_msgs(void *arg)
 {
  struct job my_job;
  my_job = *((struct job *) arg);
  fp = my_job.j_fp;
  m_in = my_job.j_m_in;
  /* Check for special control messages. */
  if (who_e == CLOCK) {
 	/* Alarm timer expired. Used only for select(). Check it. */
 	expire_timers(m_in.NOTIFY_TIMESTAMP);
  } else if (who_e == DS_PROC_NR) {
 	/* DS notifies us of an event. */
 	ds_event();
  } else {
 	/* Device notifies us of an event. */
 	dev_status(&m_in);
  }
  thread_cleanup(NULL);
  return(NULL);
 }
 /*===========================================================================*
 *			       do_fs_reply				     *
 *===========================================================================*/
 PRIVATE void *do_fs_reply(struct job *job)
 {
  struct vmnt *vmp;
  struct fproc *rfp;
  if (verbose) printf("VFS: reply to request!\n");
  if ((vmp = find_vmnt(who_e)) == NULL)
 	panic("Couldn't find vmnt for endpoint %d", who_e);
  rfp = job->j_fp;
  if (rfp == NULL || rfp->fp_endpoint == NONE) {
 	printf("VFS: spurious reply from %d\n", who_e);
 	return(NULL);
  }
  *rfp->fp_sendrec = m_in;
  vmp->m_comm.c_cur_reqs--;	/* We've got our reply, make room for others */
  worker_signal(worker_get(rfp->fp_wtid));/* Continue this worker thread */
  return(NULL);
 }
 /*===========================================================================*
 *				lock_pm					     *
 *===========================================================================*/
 PRIVATE void lock_pm(void)
 {
  message org_m_in;
  struct fproc *org_fp;
  struct worker_thread *org_self;
  /* First try to get it right off the bat */
  if (mutex_trylock(&pm_lock) == 0)
 	return;
  org_m_in = m_in;
  org_fp = fp;
  org_self = self;
  if (mutex_lock(&pm_lock) != 0)
 	panic("Could not obtain lock on pm\n");
  m_in = org_m_in;
  fp = org_fp;
  self = org_self;
 }
 /*===========================================================================*
 *				unlock_pm				     *
 *===========================================================================*/
 PRIVATE void unlock_pm(void)
 {
  if (mutex_unlock(&pm_lock) != 0)
 	panic("Could not release lock on pm");
 }
 /*===========================================================================*
 *			       do_pm					     *
 *===========================================================================*/
 PRIVATE void *do_pm(void *arg)
 {
  struct job my_job;
  struct fproc *rfp;
  my_job = *((struct job *) arg);
  rfp = fp = my_job.j_fp;
  m_in = my_job.j_m_in;
  lock_pm();
  service_pm();
  unlock_pm();
  thread_cleanup(NULL);
  return(NULL);
 }
 /*===========================================================================*
 *			       do_pending_pipe					     *
 *===========================================================================*/
 PRIVATE void *do_pending_pipe(void *arg)
 {
  int r, fd_nr;
  struct filp *f;
  struct job my_job;
  tll_access_t locktype;
  my_job = *((struct job *) arg);
  fp = my_job.j_fp;
  m_in = my_job.j_m_in;
  lock_proc(fp, 1 /* force lock */);
  fd_nr = fp->fp_block_fd;
  locktype = (call_nr == READ) ? VNODE_READ : VNODE_WRITE;
  f = get_filp(fd_nr, locktype);
  assert(f != NULL);
  r = rw_pipe((call_nr == READ) ? READING : WRITING, who_e, fd_nr, f,
 	      fp->fp_buffer, fp->fp_nbytes);
  if (r != SUSPEND)  /* Do we have results to report? */
 	reply(who_e, r);
  unlock_filp(f);
  thread_cleanup(fp);
  return(NULL);
 }
 /*===========================================================================*
 *			       do_dummy					     *
 *===========================================================================*/
 PUBLIC void *do_dummy(void *arg)
 {
  struct job my_job;
  int r;
  my_job = *((struct job *) arg);
  fp = my_job.j_fp;
  m_in = my_job.j_m_in;
  if ((r = mutex_trylock(&fp->fp_lock)) == 0) {
 	thread_cleanup(fp);
  } else {
 	/* Proc is busy, let that worker thread carry out the work */
 	thread_cleanup(NULL);
  }
  return(NULL);
 }
 /*===========================================================================*
 *			       do_work					     *
 *===========================================================================*/
 PRIVATE void *do_work(void *arg)
 {
  int error;
  struct job my_job;
  my_job = *((struct job *) arg);
  fp = my_job.j_fp;
  m_in = my_job.j_m_in;
  lock_proc(fp, 0); /* This proc is busy */
  if (call_nr == MAPDRIVER) {
 	error = do_mapdriver();
  } else if (call_nr == COMMON_GETSYSINFO) {
 	error = do_getsysinfo();
  } else if (IS_PFS_VFS_RQ(call_nr)) {
 	if (who_e != PFS_PROC_NR) {
 		printf("VFS: only PFS is allowed to make nested VFS calls\n");
 		error = ENOSYS;
 	} else if (call_nr <= PFS_BASE || call_nr >= PFS_BASE + PFS_NREQS) {
 		error = ENOSYS;
 	} else {
 		call_nr -= PFS_BASE;
 		error = (*pfs_call_vec[call_nr])();
 	}
  } else {
 	/* We're dealing with a POSIX system call from a normal
 	 * process. Call the internal function that does the work.
 	 */
 	if (call_nr < 0 || call_nr >= NCALLS) {
 		error = ENOSYS;
 	} else if (fp->fp_flags & FP_EXITING) {
 		error = SUSPEND;
 	} else if (fp->fp_pid == PID_FREE) {
 		/* Process vanished before we were able to handle request.
 		 * Replying has no use. Just drop it. */
 		error = SUSPEND;
 	} else {
 #if ENABLE_SYSCALL_STATS
 		calls_stats[call_nr]++;
 #endif
 		error = (*call_vec[call_nr])();
 	}
  }
  /* Copy the results back to the user and send reply. */
  if (error != SUSPEND) {
 	if (deadlock_resolving) {
 		struct vmnt *vmp;
 		if ((vmp = find_vmnt(who_e)) != NULL)
 			vmp->m_flags &= ~VMNT_BACKCALL;
 		if (fp->fp_wtid == dl_worker.w_tid)
 			deadlock_resolving = 0;
 	}
 	reply(who_e, error );
  }
  thread_cleanup(fp);
  return(NULL);
 }
 /*===========================================================================*
 *			       sef_local_startup			     *
 *===========================================================================*/
 PRIVATE void sef_local_startup()
 {
  /* Register init callbacks. */
  sef_setcb_init_fresh(sef_cb_init_fresh);
  sef_setcb_init_restart(sef_cb_init_fail);
  /* No live update support for now. */
  /* Let SEF perform startup. */
  sef_startup();
 }
 /*===========================================================================*
 *				sef_cb_init_fresh			     *
 *===========================================================================*/
 PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
 {
 /* Initialize the virtual file server. */
  int s, i;
  struct fproc *rfp;
  message mess;
  struct rprocpub rprocpub[NR_BOOT_PROCS];
  force_sync = 0;
  /* Initialize proc endpoints to NONE */
  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 	rfp->fp_endpoint = NONE;
 	rfp->fp_pid = PID_FREE;
  }
  /* Initialize the process table with help of the process manager messages.
   * Expect one message for each system process with its slot number and pid.
   * When no more processes follow, the magic process number NONE is sent.
   * Then, stop and synchronize with the PM.
   */
  do {
 	if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
 		panic("VFS: couldn't receive from PM: %d", s);
 	if (mess.m_type != PM_INIT)
 		panic("unexpected message from PM: %d", mess.m_type);
 	if (NONE == mess.PM_PROC) break;
 	rfp = &fproc[mess.PM_SLOT];
 	rfp->fp_flags = FP_NOFLAGS;
 	rfp->fp_pid = mess.PM_PID;
 	rfp->fp_endpoint = mess.PM_PROC;
 	rfp->fp_grant = GRANT_INVALID;
 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
 	rfp->fp_realuid = (uid_t) SYS_UID;
 	rfp->fp_effuid = (uid_t) SYS_UID;
 	rfp->fp_realgid = (gid_t) SYS_GID;
 	rfp->fp_effgid = (gid_t) SYS_GID;
 	rfp->fp_umask = ~0;
  } while (TRUE);			/* continue until process NONE */
  mess.m_type = OK;			/* tell PM that we succeeded */
  s = send(PM_PROC_NR, &mess);		/* send synchronization message */
  /* All process table entries have been set. Continue with initialization. */
  fp = &fproc[_ENDPOINT_P(VFS_PROC_NR)];/* During init all communication with
 					 * FSes is on behalf of myself */
  init_dmap();			/* Initialize device table. */
  system_hz = sys_hz();
  /* Map all the services in the boot image. */
  if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
 			    (vir_bytes) rprocpub, sizeof(rprocpub), S)) != OK){
 	panic("sys_safecopyfrom failed: %d", s);
  }
  for (i = 0; i < NR_BOOT_PROCS; i++) {
 	if (rprocpub[i].in_use) {
 		if ((s = map_service(&rprocpub[i])) != OK) {
 			panic("VFS: unable to map service: %d", s);
 		}
 	}
  }
  /* Subscribe to driver events for VFS drivers. */
  if ((s = ds_subscribe("drv\\.vfs\\..*", DSF_INITIAL | DSF_OVERWRITE) != OK)){
 	panic("VFS: can't subscribe to driver events (%d)", s);
  }
 #if DO_SANITYCHECKS
  FIXME("VFS: DO_SANITYCHECKS is on");
 #endif
  /* Initialize worker threads */
  for (i = 0; i < NR_WTHREADS; i++)  {
 	worker_init(&workers[i]);
  }
  worker_init(&sys_worker); /* exclusive system worker thread */
  worker_init(&dl_worker); /* exclusive worker thread to resolve deadlocks */
  /* Initialize global locks */
  if (mthread_mutex_init(&pm_lock, NULL) != 0)
 	panic("VFS: couldn't initialize pm lock mutex");
  if (mthread_mutex_init(&exec_lock, NULL) != 0)
 	panic("VFS: couldn't initialize exec lock");
  if (mthread_mutex_init(&bsf_lock, NULL) != 0)
 	panic("VFS: couldn't initialize block special file lock");
  /* Initialize event resources for boot procs and locks for all procs */
  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 	assert(mutex_init(&rfp->fp_lock, NULL) == 0);
 #if LOCK_DEBUG
 	rfp->fp_vp_rdlocks = 0;
 	rfp->fp_vmnt_rdlocks = 0;
 #endif
  }
  init_vnodes();		/* init vnodes */
  init_vmnts();			/* init vmnt structures */
  init_select();		/* init select() structures */
  init_filps();			/* Init filp structures */
  mount_pfs();			/* mount Pipe File Server */
  worker_start(do_init_root);	/* mount initial ramdisk as file system root */
  return(OK);
 }
 /*===========================================================================*
 *			       do_init_root				     *
 *===========================================================================*/
 PRIVATE void *do_init_root(void *arg)
 {
  struct fproc *rfp;
  struct job my_job;
  int r;
  char *mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
  my_job = *((struct job *) arg);
  fp = my_job.j_fp;
  lock_proc(fp, 1 /* force lock */); /* This proc is busy */
  lock_pm();
  /* Initialize process directories. mount_fs will set them to the correct
   * values */
  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 	FD_ZERO(&(rfp->fp_filp_inuse));
 	rfp->fp_rd = NULL;
 	rfp->fp_wd = NULL;
  }
  if ((r = mount_fs(DEV_IMGRD, "/", MFS_PROC_NR, 0, mount_label)) != OK)
 	panic("Failed to initialize root");
  unlock_pm();
  thread_cleanup(fp);
  return(NULL);
 }
 /*===========================================================================*
 *				lock_proc				     *
 *===========================================================================*/
 PUBLIC void lock_proc(struct fproc *rfp, int force_lock)
 {
  int r;
  message org_m_in;
  struct fproc *org_fp;
  struct worker_thread *org_self;
  r = mutex_trylock(&rfp->fp_lock);
  /* Were we supposed to obtain this lock immediately? */
  if (force_lock) {
 	assert(r == 0);
 	return;
  }
  if (r == 0) return;
  org_m_in = m_in;
  org_fp = fp;
  org_self = self;
  assert(mutex_lock(&rfp->fp_lock) == 0);
  m_in = org_m_in;
  fp = org_fp;
  self = org_self;
 }
 /*===========================================================================*
 *				unlock_proc				     *
 *===========================================================================*/
 PUBLIC void unlock_proc(struct fproc *rfp)
 {
  int r;
  if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
 	panic("Failed to unlock: %d", r);
 }
 /*===========================================================================*
 *				thread_cleanup				     *
 *===========================================================================*/
 PRIVATE void thread_cleanup_f(struct fproc *rfp, char *f, int l)
 {
 /* Clean up worker thread. Skip parts if this thread is not associated
 * with a particular process (i.e., rfp is NULL) */
  if (verbose) printf("AVFS: thread %d is cleaning up for fp=%p (%s:%d)\n",
 			mthread_self(), rfp, f, l);
  assert(mthread_self() != -1);
 #if LOCK_DEBUG
  if (rfp != NULL) {
 	check_filp_locks_by_me();
 	check_vnode_locks_by_me(rfp);
 	check_vmnt_locks_by_me(rfp);
  }
 #endif
  if (rfp != NULL && rfp->fp_flags & FP_PM_PENDING) {	/* Postponed PM call */
 	m_in = rfp->fp_job.j_m_in;
 	rfp->fp_flags &= ~FP_PM_PENDING;
 	service_pm_postponed();
  }
 #if LOCK_DEBUG
  if (rfp != NULL) {
 	check_filp_locks_by_me();
 	check_vnode_locks_by_me(rfp);
 	check_vmnt_locks_by_me(rfp);
  }
 #endif
  if (rfp != NULL) unlock_proc(rfp);
 #if 0
  mthread_exit(NULL);
 #endif
 }
 /*===========================================================================*
 *				get_work				     *
 *===========================================================================*/
 PRIVATE void get_work()
 {
  /* Normally wait for new input.  However, if 'reviving' is
   * nonzero, a suspended process must be awakened.
   */
  int r, found_one, proc_p;
  register struct fproc *rp;
  if (verbose) printf("VFS: get_work looking for work\n");
  while (reviving != 0) {
 	found_one = FALSE;
 	/* Find a suspended process. */
 	for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
 		if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) {
 			found_one = TRUE; /* Found a suspended process */
 			if (unblock(rp))
 				return;	/* So main loop can process job */
 			send_work();
 		}
 	if (!found_one)	/* Consistency error */
 		panic("VFS: get_work couldn't revive anyone");
  }
  for(;;) {
 	/* Normal case.  No one to revive. Get a useful request. */
 	if ((r = sef_receive(ANY, &m_in)) != OK) {
 		panic("VFS: sef_receive error: %d", r);
 	}
 	proc_p = _ENDPOINT_P(m_in.m_source);
 	if (proc_p < 0) fp = NULL;
 	else fp = &fproc[proc_p];
 	if (m_in.m_type == EDEADSRCDST) return;	/* Failed 'sendrec' */
 	if (verbose) printf("AVFS: got work from %d (fp=%p)\n", m_in.m_source,
 			    fp);
 	/* Negative who_p is never used to access the fproc array. Negative
 	 * numbers (kernel tasks) are treated in a special way.
 	 */
 	if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc)))
 		panic("receive process out of range: %d", who_p);
 	if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) {
 		printf("VFS: ignoring request from %d, endpointless slot %d (%d)\n",
 			m_in.m_source, who_p, m_in.m_type);
 		continue;
 	}
 	/* Internal consistency check; our mental image of process numbers and
 	 * endpoints must match with how the rest of the system thinks of them.
 	 */
 	if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) {
 		if (fproc[who_p].fp_endpoint == NONE)
 			printf("slot unknown even\n");
 		printf("VFS: receive endpoint inconsistent (source %d, who_p "
 			"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
 			fproc[who_p].fp_endpoint, who_e);
 		panic("VFS: inconsistent endpoint ");
 	}
 	return;
  }
 }
 /*===========================================================================*
 *				reply					     *
 *===========================================================================*/
 PUBLIC void reply(whom, result)
 int whom;			/* process to reply to */
 int result;			/* result of the call (usually OK or error #) */
 {
 /* Send a reply to a user process.  If the send fails, just ignore it. */
  int r;
  m_out.reply_type = result;
  r = sendnb(whom, &m_out);
  if (r != OK) {
 	printf("VFS: couldn't send reply %d to %d: %d\n", result, whom, r);
 	panic("Yikes %d", call_nr);
  }
 }
 /*===========================================================================*
 *				service_pm_postponed			     *
 *===========================================================================*/
 PRIVATE void service_pm_postponed(void)
 {
  int r;
  vir_bytes pc;
 #if 0
  printf("executing postponed: ");
  if (call_nr == PM_EXEC)	printf("PM_EXEC");
  if (call_nr == PM_EXIT)	printf("PM_EXIT");
  if (call_nr == PM_DUMPCORE)	printf("PM_DUMPCORE");
  printf("\n");
 #endif
  switch(call_nr) {
    case PM_EXEC:
 	r = pm_exec(m_in.PM_PROC, m_in.PM_PATH, m_in.PM_PATH_LEN,
 		    m_in.PM_FRAME, m_in.PM_FRAME_LEN, &pc);
 	/* Reply status to PM */
 	m_out.m_type = PM_EXEC_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	m_out.PM_PC = (void*)pc;
 	m_out.PM_STATUS = r;
 	break;
    case PM_EXIT:
 	pm_exit(m_in.PM_PROC);
 	/* Reply dummy status to PM for synchronization */
 	m_out.m_type = PM_EXIT_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_DUMPCORE:
 	r = pm_dumpcore(m_in.PM_PROC,
 			NULL /* (struct mem_map *) m_in.PM_SEGPTR */);
 	/* Reply status to PM */
 	m_out.m_type = PM_CORE_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	m_out.PM_STATUS = r;
 	break;
    default:
 	panic("Unhandled postponed PM call %d", m_in.m_type);
  }
  r = send(PM_PROC_NR, &m_out);
  if (r != OK)
 	panic("service_pm_postponed: send failed: %d", r);
 }
 /*===========================================================================*
 *				service_pm				     *
 *===========================================================================*/
 PRIVATE void service_pm()
 {
  int r, slot;
  if (verbose) printf("service_pm: %d (%d)\n", call_nr, mthread_self());
  switch (call_nr) {
    case PM_SETUID:
 	pm_setuid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID);
 	m_out.m_type = PM_SETUID_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_SETGID:
 	pm_setgid(m_in.PM_PROC, m_in.PM_EID, m_in.PM_RID);
 	m_out.m_type = PM_SETGID_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_SETSID:
 	pm_setsid(m_in.PM_PROC);
 	m_out.m_type = PM_SETSID_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_EXEC:
    case PM_EXIT:
    case PM_DUMPCORE:
 	okendpt(m_in.PM_PROC, &slot);
 	fp = &fproc[slot];
 	assert(!(fp->fp_flags & FP_PENDING));
 	fp->fp_job.j_m_in = m_in;
 	fp->fp_flags |= FP_PM_PENDING;
 #if 0
 	printf("Postponing: ");
 	if (call_nr == PM_EXEC)		printf("PM_EXEC");
 	if (call_nr == PM_EXIT)		printf("PM_EXIT");
 	if (call_nr == PM_DUMPCORE)	printf("PM_DUMPCORE");
 	printf("\n");
 #endif
        /* PM requests on behalf of a proc are handled after the system call
         * that might be in progress for that proc has finished. If the proc
         * is not busy, we start a dummy call */
 	if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) {
 		mutex_unlock(&fp->fp_lock);
 		worker_start(do_dummy);
 		yield();
        }
 	return;
    case PM_FORK:
    case PM_SRV_FORK:
 	pm_fork(m_in.PM_PPROC, m_in.PM_PROC, m_in.PM_CPID);
 	m_out.m_type = (call_nr == PM_FORK) ? PM_FORK_REPLY : PM_SRV_FORK_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_SETGROUPS:
 	pm_setgroups(m_in.PM_PROC, m_in.PM_GROUP_NO, m_in.PM_GROUP_ADDR);
 	m_out.m_type = PM_SETGROUPS_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_UNPAUSE:
 	unpause(m_in.PM_PROC);
 	m_out.m_type = PM_UNPAUSE_REPLY;
 	m_out.PM_PROC = m_in.PM_PROC;
 	break;
    case PM_REBOOT:
 	pm_reboot();
 	/* Reply dummy status to PM for synchronization */
 	m_out.m_type = PM_REBOOT_REPLY;
 	break;
    default:
 	printf("VFS: don't know how to handle PM request %d\n", call_nr);
 	return;
  }
  r = send(PM_PROC_NR, &m_out);
  if (r != OK)
 	panic("service_pm: send failed: %d", r);
 }
 /*===========================================================================*
 *				unblock					     *
 *===========================================================================*/
 PRIVATE int unblock(rfp)
 struct fproc *rfp;
 {
  int blocked_on;
  fp = rfp;
  blocked_on = rfp->fp_blocked_on;
  m_in.m_type = rfp->fp_block_callnr;
  m_in.fd = rfp->fp_block_fd;
  m_in.buffer = rfp->fp_buffer;
  m_in.nbytes = rfp->fp_nbytes;
  rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
  rfp->fp_flags &= ~FP_REVIVED;
  reviving--;
  assert(reviving >= 0);
  /* This should be a pipe I/O, not a device I/O. If it is, it'll 'leak'
   * grants.
   */
  assert(!GRANT_VALID(rfp->fp_grant));
  /* Pending pipe reads/writes can be handled directly */
  if (blocked_on == FP_BLOCKED_ON_PIPE) {
 	worker_start(do_pending_pipe);
 	yield();	/* Give thread a chance to run */
 	return(0);	/* Retrieve more work */
  }
  return(1);	/* We've unblocked a process */
 }
--- a/servers/avfs/misc.c
+++ b/servers/avfs/misc.c
@ -0,0 +1,617 @@
 /* This file contains a collection of miscellaneous procedures.  Some of them
 * perform simple system calls.  Some others do a little part of system calls
 * that are mostly performed by the Memory Manager.
 *
 * The entry points into this file are
 *   do_dup:	  perform the DUP system call
 *   do_fcntl:	  perform the FCNTL system call
 *   do_sync:	  perform the SYNC system call
 *   do_fsync:	  perform the FSYNC system call
 *   pm_reboot:	  sync disks and prepare for shutdown
 *   pm_fork:	  adjust the tables after PM has performed a FORK system call
 *   do_exec:	  handle files with FD_CLOEXEC on after PM has done an EXEC
 *   do_exit:	  a process has exited; note that in the tables
 *   do_set:	  set uid or gid for some process
 *   do_revive:	  revive a process that was waiting for something (e.g. TTY)
 *   do_svrctl:	  file system control
 *   do_getsysinfo:	request copy of FS data structure
 *   pm_dumpcore: create a core dump
 */
 #include "fs.h"
 #include <fcntl.h>
 #include <assert.h>
 #include <unistd.h>
 #include <string.h>
 #include <minix/callnr.h>
 #include <minix/safecopies.h>
 #include <minix/endpoint.h>
 #include <minix/com.h>
 #include <minix/sysinfo.h>
 #include <minix/u64.h>
 #include <sys/ptrace.h>
 #include <sys/svrctl.h>
 #include "file.h"
 #include "fproc.h"
 #include "dmap.h"
 #include <minix/vfsif.h>
 #include "vnode.h"
 #include "vmnt.h"
 #include "param.h"
 #define CORE_NAME	"core"
 #define CORE_MODE	0777	/* mode to use on core image files */
 #if ENABLE_SYSCALL_STATS
 PUBLIC unsigned long calls_stats[NCALLS];
 #endif
 FORWARD _PROTOTYPE( void free_proc, (struct fproc *freed, int flags)	);
 /*
 FORWARD _PROTOTYPE( int dumpcore, (int proc_e, struct mem_map *seg_ptr)	);
 FORWARD _PROTOTYPE( int write_bytes, (struct inode *rip, off_t off,
 				      char *buf, size_t bytes)		);
 FORWARD _PROTOTYPE( int write_seg, (struct inode *rip, off_t off, int proc_e,
 			int seg, off_t seg_off, phys_bytes seg_bytes)	);
 */
 /*===========================================================================*
 *				do_getsysinfo				     *
 *===========================================================================*/
 PUBLIC int do_getsysinfo()
 {
  vir_bytes src_addr, dst_addr;
  size_t len;
  /* Only su may call do_getsysinfo. This call may leak information (and is not
   * stable enough to be part of the API/ABI).
   */
  if (!super_user) return(EPERM);
  /* This call should no longer be used by user applications. In the future,
   * requests from non-system processes should be denied. For now, just warn.
   */
  if (call_nr == GETSYSINFO) {
 	printf("VFS: obsolete call of do_getsysinfo() by proc %d\n",
 		fp->fp_endpoint);
  }
  switch(m_in.info_what) {
    case SI_PROC_TAB:
 	src_addr = (vir_bytes) fproc;
 	len = sizeof(struct fproc) * NR_PROCS;
 	break;
    case SI_DMAP_TAB:
 	src_addr = (vir_bytes) dmap;
 	len = sizeof(struct dmap) * NR_DEVICES;
 	break;
 #if ENABLE_SYSCALL_STATS
    case SI_CALL_STATS:
 	src_addr = (vir_bytes) calls_stats;
 	len = sizeof(calls_stats);
 	break;
 #endif
    default:
 	return(EINVAL);
  }
  dst_addr = (vir_bytes) m_in.info_where;
  return sys_datacopy(SELF, src_addr, who_e, dst_addr, len);
 }
 /*===========================================================================*
 *				do_dup					     *
 *===========================================================================*/
 PUBLIC int do_dup()
 {
 /* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are
 * obsolete.  In fact, it is not even possible to invoke them using the
 * current library because the library routines call fcntl().  They are
 * provided to permit old binary programs to continue to run.
 */
  register int rfd;
  register struct filp *f;
  int r = OK;
  /* Is the file descriptor valid? */
  rfd = m_in.fd & ~DUP_MASK;		/* kill off dup2 bit, if on */
  if ((f = get_filp(rfd, VNODE_READ)) == NULL) return(err_code);
  /* Distinguish between dup and dup2. */
  if (m_in.fd == rfd) {			/* bit not on */
 	/* dup(fd) */
 	r = get_fd(0, 0, &m_in.fd2, NULL);
  } else {
 	/* dup2(old_fd, new_fd) */
 	if (m_in.fd2 < 0 || m_in.fd2 >= OPEN_MAX) {
 		r = EBADF;
 	} else if (rfd == m_in.fd2) {	/* ignore the call: dup2(x, x) */
 		r = m_in.fd2;
 	} else {
 		/* All is fine, close new_fd if necessary */
 		m_in.fd = m_in.fd2;	/* prepare to close fd2 */
 		unlock_filp(f);		/* or it might deadlock on do_close */
 		(void) do_close();	/* cannot fail */
 		f = get_filp(rfd, VNODE_READ); /* lock old_fd again */
 	}
  }
  if (r == OK) {
 	/* Success. Set up new file descriptors. */
 	f->filp_count++;
 	fp->fp_filp[m_in.fd2] = f;
 	FD_SET(m_in.fd2, &fp->fp_filp_inuse);
 	r = m_in.fd2;
  }
  unlock_filp(f);
  return(r);
 }
 /*===========================================================================*
 *				do_fcntl				     *
 *===========================================================================*/
 PUBLIC int do_fcntl()
 {
 /* Perform the fcntl(fd, request, ...) system call. */
  register struct filp *f;
  int new_fd, fl, r = OK;
  tll_access_t locktype;
  /* Is the file descriptor valid? */
  locktype = (m_in.request == F_FREESP) ? VNODE_WRITE : VNODE_READ;
  if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code);
  switch (m_in.request) {
    case F_DUPFD:
 	/* This replaces the old dup() system call. */
 	if (m_in.addr < 0 || m_in.addr >= OPEN_MAX) r = EINVAL;
 	else if ((r = get_fd(m_in.addr, 0, &new_fd, NULL)) == OK) {
 		f->filp_count++;
 		fp->fp_filp[new_fd] = f;
 		r = new_fd;
 	}
 	break;
    case F_GETFD:
 	/* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
 	r = FD_ISSET(m_in.fd, &fp->fp_cloexec_set) ? FD_CLOEXEC : 0;
 	break;
    case F_SETFD:
 	/* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
 	if(m_in.addr & FD_CLOEXEC)
 		FD_SET(m_in.fd, &fp->fp_cloexec_set);
 	else
 		FD_CLR(m_in.fd, &fp->fp_cloexec_set);
 	break;
    case F_GETFL:
 	/* Get file status flags (O_NONBLOCK and O_APPEND). */
 	fl = f->filp_flags & (O_NONBLOCK | O_APPEND | O_ACCMODE);
 	r = fl;
 	break;
    case F_SETFL:
 	/* Set file status flags (O_NONBLOCK and O_APPEND). */
 	fl = O_NONBLOCK | O_APPEND | O_REOPEN;
 	f->filp_flags = (f->filp_flags & ~fl) | (m_in.addr & fl);
 	break;
    case F_GETLK:
    case F_SETLK:
    case F_SETLKW:
 	/* Set or clear a file lock. */
 	r = lock_op(f, m_in.request);
 	break;
    case F_FREESP:
     {
 	/* Free a section of a file. Preparation is done here, actual freeing
 	 * in freesp_inode().
 	 */
 	off_t start, end;
 	struct flock flock_arg;
 	signed long offset;
 	/* Check if it's a regular file. */
 	if ((f->filp_vno->v_mode & I_TYPE) != I_REGULAR) r = EINVAL;
 	else if (!(f->filp_mode & W_BIT)) r = EBADF;
 	else
 		/* Copy flock data from userspace. */
 		r = sys_datacopy(who_e, (vir_bytes) m_in.name1, SELF,
 				 (vir_bytes) &flock_arg,
 				 (phys_bytes) sizeof(flock_arg));
 	if (r != OK) break;
 	/* Convert starting offset to signed. */
 	offset = (signed long) flock_arg.l_start;
 	/* Figure out starting position base. */
 	switch(flock_arg.l_whence) {
 	  case SEEK_SET: start = 0; break;
 	  case SEEK_CUR:
 		if (ex64hi(f->filp_pos) != 0)
 			panic("do_fcntl: position in file too high");
 		start = ex64lo(f->filp_pos);
 		break;
 	  case SEEK_END: start = f->filp_vno->v_size; break;
 	  default: r = EINVAL;
 	}
 	if (r != OK) break;
 	/* Check for overflow or underflow. */
 	if (offset > 0 && start + offset < start) r = EINVAL;
 	else if (offset < 0 && start + offset > start) r = EINVAL;
 	else {
 		start += offset;
 		if (start < 0) r = EINVAL;
 	}
 	if (r != OK) break;
 	if (flock_arg.l_len != 0) {
 		if (start >= f->filp_vno->v_size) r = EINVAL;
 		else if ((end = start + flock_arg.l_len) <= start) r = EINVAL;
 		else if (end > f->filp_vno->v_size) end = f->filp_vno->v_size;
 	} else {
                end = 0;
 	}
 	if (r != OK) break;
 	r = req_ftrunc(f->filp_vno->v_fs_e, f->filp_vno->v_inode_nr,start,end);
 	if (r == OK && flock_arg.l_len == 0)
 		f->filp_vno->v_size = start;
 	break;
     }
    default:
 	r = EINVAL;
  }
  unlock_filp(f);
  return(r);
 }
 /*===========================================================================*
 *				do_sync					     *
 *===========================================================================*/
 PUBLIC int do_sync()
 {
  struct vmnt *vmp;
  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
 	lock_vmnt(vmp, VMNT_EXCL);
 	if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE)
 		req_sync(vmp->m_fs_e);
 	unlock_vmnt(vmp);
  }
  return(OK);
 }
 /*===========================================================================*
 *				do_fsync				     *
 *===========================================================================*/
 PUBLIC int do_fsync()
 {
 /* Perform the fsync() system call. For now, don't be unnecessarily smart. */
  struct filp *rfilp;
  struct vmnt *vmp;
  dev_t dev;
  if ((rfilp = get_filp(m_in.m1_i1, VNODE_READ)) == NULL) return(err_code);
  dev = rfilp->filp_vno->v_dev;
  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
 	lock_vmnt(vmp, VMNT_EXCL);
 	if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && vmp->m_fs_e != NONE)
 		req_sync(vmp->m_fs_e);
 	unlock_vmnt(vmp);
  }
  unlock_filp(rfilp);
  return(OK);
 }
 /*===========================================================================*
 *				pm_reboot				     *
 *===========================================================================*/
 PUBLIC void pm_reboot()
 {
  /* Perform the VFS side of the reboot call. */
  int i;
  struct fproc *rfp;
  do_sync();
  /* Do exit processing for all leftover processes and servers,
   * but don't actually exit them (if they were really gone, PM
   * will tell us about it).
   */
  for (i = 0; i < NR_PROCS; i++) {
 	/* Don't just free the proc right away, but let it finish what it was
 	 * doing first */
 	rfp = &fproc[i];
 	if (rfp->fp_endpoint != NONE) {
 		lock_proc(rfp, 0);
 		free_proc(rfp, 0);
 		unlock_proc(rfp);
 	}
  }
  unmount_all();
 }
 /*===========================================================================*
 *				pm_fork					     *
 *===========================================================================*/
 PUBLIC void pm_fork(pproc, cproc, cpid)
 int pproc;	/* Parent process */
 int cproc;	/* Child process */
 int cpid;	/* Child process id */
 {
 /* Perform those aspects of the fork() system call that relate to files.
 * In particular, let the child inherit its parent's file descriptors.
 * The parent and child parameters tell who forked off whom. The file
 * system uses the same slot numbers as the kernel.  Only PM makes this call.
 */
  register struct fproc *cp, *pp;
  int i, parentno, childno;
  mutex_t c_fp_lock;
  /* Check up-to-dateness of fproc. */
  okendpt(pproc, &parentno);
  /* PM gives child endpoint, which implies process slot information.
   * Don't call isokendpt, because that will verify if the endpoint
   * number is correct in fproc, which it won't be.
   */
  childno = _ENDPOINT_P(cproc);
  if (childno < 0 || childno >= NR_PROCS)
 	panic("VFS: bogus child for forking: %d", m_in.child_endpt);
  if (fproc[childno].fp_pid != PID_FREE)
 	panic("VFS: forking on top of in-use child: %d", childno);
  /* Copy the parent's fproc struct to the child. */
  /* However, the mutex variables belong to a slot and must stay the same. */
  c_fp_lock = fproc[childno].fp_lock;
  fproc[childno] = fproc[parentno];
  fproc[childno].fp_lock = c_fp_lock;
  /* Increase the counters in the 'filp' table. */
  cp = &fproc[childno];
  pp = &fproc[parentno];
  for (i = 0; i < OPEN_MAX; i++)
 	if (cp->fp_filp[i] != NULL) cp->fp_filp[i]->filp_count++;
  /* Fill in new process and endpoint id. */
  cp->fp_pid = cpid;
  cp->fp_endpoint = cproc;
  /* A forking process never has an outstanding grant, as it isn't blocking on
   * I/O. */
  if(GRANT_VALID(pp->fp_grant)) {
 	panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint,
 	       pp->fp_grant);
  }
  if(GRANT_VALID(cp->fp_grant)) {
 	panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint,
 	       cp->fp_grant);
  }
  /* A child is not a process leader, not being revived, etc. */
  cp->fp_flags = FP_NOFLAGS;
  /* Record the fact that both root and working dir have another user. */
  if (cp->fp_rd) dup_vnode(cp->fp_rd);
  if (cp->fp_wd) dup_vnode(cp->fp_wd);
 }
 /*===========================================================================*
 *				free_proc				     *
 *===========================================================================*/
 PRIVATE void free_proc(struct fproc *exiter, int flags)
 {
  int i;
  register struct fproc *rfp;
  register struct filp *rfilp;
  register struct vnode *vp;
  dev_t dev;
  if (exiter->fp_endpoint == NONE)
 	panic("free_proc: already free");
  if (fp_is_blocked(exiter))
 	unpause(exiter->fp_endpoint);
  /* Loop on file descriptors, closing any that are open. */
  for (i = 0; i < OPEN_MAX; i++) {
 	(void) close_fd(exiter, i);
  }
  /* Check if any process is SUSPENDed on this driver.
   * If a driver exits, unmap its entries in the dmap table.
   * (unmapping has to be done after the first step, because the
   * dmap table is used in the first step.)
   */
  unsuspend_by_endpt(exiter->fp_endpoint);
  /* Release root and working directories. */
  if (exiter->fp_rd) { put_vnode(exiter->fp_rd); exiter->fp_rd = NULL; }
  if (exiter->fp_wd) { put_vnode(exiter->fp_wd); exiter->fp_wd = NULL; }
  /* The rest of these actions is only done when processes actually exit. */
  if (!(flags & FP_EXITING)) return;
  /* Invalidate endpoint number for error and sanity checks. */
  exiter->fp_endpoint = NONE;
  exiter->fp_flags |= FP_EXITING;
  /* If a session leader exits and it has a controlling tty, then revoke
   * access to its controlling tty from all other processes using it.
   */
  if ((exiter->fp_flags & FP_SESLDR) && exiter->fp_tty != 0) {
      dev = exiter->fp_tty;
      for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 	  if(rfp->fp_pid == PID_FREE) continue;
          if (rfp->fp_tty == dev) rfp->fp_tty = 0;
          for (i = 0; i < OPEN_MAX; i++) {
 		if ((rfilp = rfp->fp_filp[i]) == NULL) continue;
 		if (rfilp->filp_mode == FILP_CLOSED) continue;
 		vp = rfilp->filp_vno;
 		if ((vp->v_mode & I_TYPE) != I_CHAR_SPECIAL) continue;
 		if ((dev_t) vp->v_sdev != dev) continue;
 		lock_filp(rfilp, VNODE_READ);
 		(void) dev_close(dev, rfilp-filp); /* Ignore any errors, even
 						    * SUSPEND. */
 		rfilp->filp_mode = FILP_CLOSED;
 		unlock_filp(rfilp);
          }
      }
  }
  /* Exit done. Mark slot as free. */
  exiter->fp_pid = PID_FREE;
  if (exiter->fp_flags & FP_PENDING)
 	pending--;	/* No longer pending job, not going to do it */
  exiter->fp_flags = FP_NOFLAGS;
 }
 /*===========================================================================*
 *				pm_exit					     *
 *===========================================================================*/
 PUBLIC void pm_exit(proc)
 int proc;
 {
 /* Perform the file system portion of the exit(status) system call. */
  int exitee_p;
  /* Nevertheless, pretend that the call came from the user. */
  okendpt(proc, &exitee_p);
  fp = &fproc[exitee_p];
  free_proc(fp, FP_EXITING);
 }
 /*===========================================================================*
 *				pm_setgid				     *
 *===========================================================================*/
 PUBLIC void pm_setgid(proc_e, egid, rgid)
 int proc_e;
 int egid;
 int rgid;
 {
  register struct fproc *tfp;
  int slot;
  okendpt(proc_e, &slot);
  tfp = &fproc[slot];
  tfp->fp_effgid =  egid;
  tfp->fp_realgid = rgid;
 }
 /*===========================================================================*
 *				pm_setgroups				     *
 *===========================================================================*/
 PUBLIC void pm_setgroups(proc_e, ngroups, groups)
 int proc_e;
 int ngroups;
 gid_t *groups;
 {
  struct fproc *rfp;
  int slot;
  okendpt(proc_e, &slot);
  rfp = &fproc[slot];
  if (ngroups * sizeof(gid_t) > sizeof(rfp->fp_sgroups))
 	panic("VFS: pm_setgroups: too much data to copy");
  if (sys_datacopy(who_e, (vir_bytes) groups, SELF, (vir_bytes) rfp->fp_sgroups,
 		   ngroups * sizeof(gid_t)) == OK) {
 	rfp->fp_ngroups = ngroups;
  } else
 	panic("VFS: pm_setgroups: datacopy failed");
 }
 /*===========================================================================*
 *				pm_setuid				     *
 *===========================================================================*/
 PUBLIC void pm_setuid(proc_e, euid, ruid)
 int proc_e;
 int euid;
 int ruid;
 {
  struct fproc *tfp;
  int slot;
  okendpt(proc_e, &slot);
  tfp = &fproc[slot];
  tfp->fp_effuid =  euid;
  tfp->fp_realuid = ruid;
 }
 /*===========================================================================*
 *				do_svrctl				     *
 *===========================================================================*/
 PUBLIC int do_svrctl()
 {
  switch (m_in.svrctl_req) {
  /* No control request implemented yet. */
    default:
 	return(EINVAL);
  }
 }
 /*===========================================================================*
 *				pm_dumpcore				     *
 *===========================================================================*/
 PUBLIC int pm_dumpcore(proc_e, seg_ptr)
 int proc_e;
 struct mem_map *seg_ptr;
 {
  int slot;
  okendpt(proc_e, &slot);
  free_proc(&fproc[slot], FP_EXITING);
  return(OK);
 }
 /*===========================================================================*
 *				 ds_event				     *
 *===========================================================================*/
 PUBLIC void ds_event()
 {
  char key[DS_MAX_KEYLEN];
  char *drv_prefix = "drv.vfs.";
  u32_t value;
  int type, r;
  endpoint_t owner_endpoint;
  /* Get the event and the owner from DS. */
  if ((r = ds_check(key, &type, &owner_endpoint)) != OK) {
 	if(r != ENOENT) printf("VFS: ds_event: ds_check failed: %d\n", r);
 	return;
  }
  if ((r = ds_retrieve_u32(key, &value)) != OK) {
 	printf("VFS: ds_event: ds_retrieve_u32 failed\n");
 	return;
  }
  /* Only check for VFS driver up events. */
  if (strncmp(key, drv_prefix, sizeof(drv_prefix)) || value != DS_DRIVER_UP)
 	return;
  /* Perform up. */
  dmap_endpt_up(owner_endpoint);
 }
--- a/servers/avfs/mount.c
+++ b/servers/avfs/mount.c
@ -0,0 +1,605 @@
 /* This file performs the MOUNT and UMOUNT system calls.
 *
 * The entry points into this file are
 *   do_fsready:	perform the FS_READY system call
 *   do_mount:		perform the MOUNT system call
 *   do_umount:		perform the UMOUNT system call
 *   unmount:		unmount a file system
 */
 #include "fs.h"
 #include <fcntl.h>
 #include <string.h>
 #include <minix/callnr.h>
 #include <minix/com.h>
 #include <minix/keymap.h>
 #include <minix/const.h>
 #include <minix/endpoint.h>
 #include <minix/syslib.h>
 #include <minix/bitmap.h>
 #include <minix/ds.h>
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <dirent.h>
 #include <assert.h>
 #include "file.h"
 #include "fproc.h"
 #include "dmap.h"
 #include <minix/vfsif.h>
 #include "vnode.h"
 #include "vmnt.h"
 #include "path.h"
 #include "param.h"
 /* Allow the root to be replaced before the first 'real' mount. */
 PRIVATE int have_root = 0;
 /* Bitmap of in-use "none" pseudo devices. */
 PRIVATE bitchunk_t nonedev[BITMAP_CHUNKS(NR_NONEDEVS)] = { 0 };
 #define alloc_nonedev(dev) SET_BIT(nonedev, minor(dev) - 1)
 #define free_nonedev(dev) UNSET_BIT(nonedev, minor(dev) - 1)
 FORWARD _PROTOTYPE( dev_t name_to_dev, (int allow_mountpt,
 					char path[PATH_MAX+1])		);
 FORWARD _PROTOTYPE( int is_nonedev, (dev_t dev)				);
 FORWARD _PROTOTYPE( dev_t find_free_nonedev, (void)			);
 FORWARD _PROTOTYPE( void update_bspec, (dev_t dev, endpoint_t fs_e,
 				      int send_drv_e)			);
 /*===========================================================================*
 *				update_bspec				     *
 *===========================================================================*/
 PRIVATE void update_bspec(dev_t dev, endpoint_t fs_e, int send_drv_e)
 {
 /* Update all block special files for a certain device, to use a new FS endpt
 * to route raw block I/O requests through.
 */
  struct vnode *vp;
  struct dmap *dp;
  int r, major;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
 	if (vp->v_ref_count > 0 && S_ISBLK(vp->v_mode) && vp->v_sdev == dev) {
 		vp->v_bfs_e = fs_e;
 		if (send_drv_e) {
 			major = major(dev);
 			if (major < 0 || major >= NR_DEVICES) {
 				/* Can't update driver endpoint for out of
 				 * range major */
 				continue;
 			}
 			dp = &dmap[major(dev)];
 			if (dp->dmap_driver == NONE) {
 				/* Can't send new driver endpoint for
 				 * vanished driver */
 				printf("VFS: can't send new driver endpt\n");
 				continue;
 			}
 			if ((r = req_newdriver(fs_e, vp->v_sdev,
 						dp->dmap_driver)) != OK) {
 				printf("VFS: Failed to send new driver endpoint"
 				       " for moved block special file\n");
 			}
 		}
 	}
 }
 /*===========================================================================*
 *                              do_fsready                                   *
 *===========================================================================*/
 PUBLIC int do_fsready()
 {
  /* deprecated */
  return(SUSPEND);
 }
 /*===========================================================================*
 *                              do_mount                                     *
 *===========================================================================*/
 PUBLIC int do_mount()
 {
 /* Perform the mount(name, mfile, mount_flags) system call. */
  endpoint_t fs_e;
  int r, slot, rdonly, nodev;
  char fullpath[PATH_MAX+1];
  char mount_label[LABEL_MAX];
  dev_t dev;
  /* Only the super-user may do MOUNT. */
  if (!super_user) return(EPERM);
  /* FS process' endpoint number */
  if (m_in.mount_flags & MS_LABEL16) {
 	/* Get the label from the caller, and ask DS for the endpoint. */
 	r = sys_datacopy(who_e, (vir_bytes) m_in.fs_label, SELF,
 		(vir_bytes) mount_label, (phys_bytes) sizeof(mount_label));
 	if (r != OK) return(r);
 	mount_label[sizeof(mount_label)-1] = 0;
 	r = ds_retrieve_label_endpt(mount_label, &fs_e);
 	if (r != OK) return(r);
  } else {
 	/* Legacy support: get the endpoint from the request itself. */
 	fs_e = (endpoint_t) m_in.fs_label;
 	mount_label[0] = 0;
  }
  /* Sanity check on process number. */
  if (isokendpt(fs_e, &slot) != OK) return(EINVAL);
  /* Should the file system be mounted read-only? */
  rdonly = (m_in.mount_flags & MS_RDONLY);
  /* A null string for block special device means don't use a device at all. */
  nodev = (m_in.name1_length == 0);
  if (!nodev) {
 	/* If 'name' is not for a block special file, return error. */
 	if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 		return(err_code);
 	if ((dev = name_to_dev(FALSE /*allow_mountpt*/, fullpath)) == NO_DEV)
 		return(err_code);
  } else {
 	/* Find a free pseudo-device as substitute for an actual device. */
 	if ((dev = find_free_nonedev()) == NO_DEV)
 		return(err_code);
  }
  /* Fetch the name of the mountpoint */
  if (fetch_name(m_in.name2, m_in.name2_length, M1, fullpath) != OK)
 	return(err_code);
  /* Do the actual job */
  return mount_fs(dev, fullpath, fs_e, rdonly, mount_label);
 }
 /*===========================================================================*
 *                              mount_fs				     *
 *===========================================================================*/
 PUBLIC int mount_fs(
 dev_t dev,
 char mountpoint[PATH_MAX+1],
 endpoint_t fs_e,
 int rdonly,
 char mount_label[LABEL_MAX] )
 {
  int rdir, mdir;               /* TRUE iff {root|mount} file is dir */
  int i, r = OK, found, isroot, mount_root;
  struct fproc *tfp;
  struct dmap *dp;
  struct vnode *root_node, *vp = NULL, *bspec;
  struct vmnt *new_vmp, *parent_vmp;
  char *label;
  struct node_details res;
  struct lookup resolve;
  /* Look up block device driver label when dev is not a pseudo-device */
  label = "";
  if (!is_nonedev(dev)) {
 	/* Get driver process' endpoint */
 	dp = &dmap[major(dev)];
 	if (dp->dmap_driver == NONE) {
 		printf("VFS: no driver for dev %d\n", dev);
 		return(EINVAL);
 	}
 	label = dp->dmap_label;
 	assert(strlen(label) > 0);
  }
  lock_bsf();
  /* Check whether there is a block special file open which uses the
   * same device (partition) */
  for (bspec = &vnode[0]; bspec < &vnode[NR_VNODES]; ++bspec) {
 	if (bspec->v_ref_count > 0 && bspec->v_sdev == dev) {
 		/* Found, flush and invalidate any blocks for this device. */
 		req_flush(bspec->v_fs_e, dev);
 		break;
 	}
  }
  /* Scan vmnt table to see if dev already mounted. If not, find a free slot.*/
  found = FALSE;
  for (i = 0; i < NR_MNTS; ++i) {
 	if (vmnt[i].m_dev == dev) found = TRUE;
  }
  if (found) {
 	unlock_bsf();
 	return(EBUSY);
  } else if ((new_vmp = get_free_vmnt()) == NULL) {
 	unlock_bsf();
 	return(ENOMEM);
  }
  lock_vmnt(new_vmp, VMNT_EXCL);
  isroot = (strcmp(mountpoint, "/") == 0);
  mount_root = (isroot && have_root < 2); /* Root can be mounted twice:
 					   * 1: ramdisk
 					   * 2: boot disk (e.g., harddisk)
 					   */
  if (!mount_root) {
 	/* Get vnode of mountpoint */
 	lookup_init(&resolve, mountpoint, PATH_NOFLAGS, &parent_vmp, &vp);
 	resolve.l_vmnt_lock = VMNT_EXCL;
 	resolve.l_vnode_lock = VNODE_WRITE;
 	if ((vp = eat_path(&resolve, fp)) == NULL)
 		r = err_code;
 	else if (vp->v_ref_count == 1) {
 		/*Tell FS on which vnode it is mounted (glue into mount tree)*/
 		r = req_mountpoint(vp->v_fs_e, vp->v_inode_nr);
 	} else
 		r = EBUSY;
 	if (r != OK) {
 		if (vp != NULL) {
 			unlock_vnode(vp);
 			unlock_vmnt(parent_vmp);
 			put_vnode(vp);
 		}
 		unlock_vmnt(new_vmp);
 		unlock_bsf();
 		return(r);
 	}
  }
 /* XXX: move this upwards before lookup after proper locking. */
  /* We'll need a vnode for the root inode */
  if ((root_node = get_free_vnode()) == NULL || dev == 266) {
 	if (vp != NULL) {
 		unlock_vnode(vp);
 		unlock_vmnt(parent_vmp);
 		put_vnode(vp);
 	}
 	unlock_vmnt(new_vmp);
 	unlock_bsf();
 	return(err_code);
  }
  lock_vnode(root_node, VNODE_OPCL);
  /* Store some essential vmnt data first */
  new_vmp->m_fs_e = fs_e;
  new_vmp->m_dev = dev;
  if (rdonly) new_vmp->m_flags |= VMNT_READONLY;
  else new_vmp->m_flags &= ~VMNT_READONLY;
  /* Tell FS which device to mount */
  if ((r = req_readsuper(fs_e, label, dev, rdonly, isroot, &res)) != OK) {
 	if (vp != NULL) {
 		unlock_vnode(vp);
 		unlock_vmnt(parent_vmp);
 		put_vnode(vp);
 	}
 	new_vmp->m_fs_e = NONE;
 	new_vmp->m_dev = NO_DEV;
 	unlock_vnode(root_node);
 	unlock_vmnt(new_vmp);
 	unlock_bsf();
 	return(r);
  }
  /* Fill in root node's fields */
  root_node->v_fs_e = res.fs_e;
  root_node->v_inode_nr = res.inode_nr;
  root_node->v_mode = res.fmode;
  root_node->v_uid = res.uid;
  root_node->v_gid = res.gid;
  root_node->v_size = res.fsize;
  root_node->v_sdev = NO_DEV;
  root_node->v_fs_count = 1;
  root_node->v_ref_count = 1;
  /* Root node is indeed on the partition */
  root_node->v_vmnt = new_vmp;
  root_node->v_dev = new_vmp->m_dev;
  if(mount_root) {
 	/* Superblock and root node already read.
 	 * Nothing else can go wrong. Perform the mount. */
 	new_vmp->m_root_node = root_node;
 	new_vmp->m_mounted_on = NULL;
 	strcpy(new_vmp->m_label, mount_label);
 	if (is_nonedev(dev)) alloc_nonedev(dev);
 	update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */);
 	ROOT_DEV = dev;
 	ROOT_FS_E = fs_e;
 	/* Replace all root and working directories */
 	for (i = 0, tfp = fproc; i < NR_PROCS; i++, tfp++) {
 		if (tfp->fp_pid == PID_FREE)
 			continue;
 #define		MAKEROOT(what) { 			\
 			if (what) put_vnode(what);	\
 			dup_vnode(root_node);		\
 			what = root_node;		\
 		}
 		MAKEROOT(tfp->fp_rd);
 		MAKEROOT(tfp->fp_wd);
 	}
 	unlock_vnode(root_node);
 	unlock_vmnt(new_vmp);
 	have_root++; /* We have a (new) root */
 	unlock_bsf();
 	return(OK);
  }
  /* File types may not conflict. */
  mdir = ((vp->v_mode & I_TYPE) == I_DIRECTORY); /*TRUE iff dir*/
  rdir = ((root_node->v_mode & I_TYPE) == I_DIRECTORY);
  if (!mdir && rdir) r = EISDIR;
  /* If error, return the super block and both inodes; release the vmnt. */
  if (r != OK) {
 	unlock_vnode(vp);
 	unlock_vmnt(parent_vmp);
 	unlock_vnode(root_node);
 	unlock_vmnt(new_vmp);
 	put_vnode(vp);
 	put_vnode(root_node);
 	new_vmp->m_dev = NO_DEV;
 	unlock_bsf();
 	return(r);
  }
  /* Nothing else can go wrong.  Perform the mount. */
  new_vmp->m_mounted_on = vp;
  new_vmp->m_root_node = root_node;
  strcpy(new_vmp->m_label, mount_label);
  /* Allocate the pseudo device that was found, if not using a real device. */
  if (is_nonedev(dev)) alloc_nonedev(dev);
  /* The new FS will handle block I/O requests for its device now. */
  update_bspec(dev, fs_e, 0 /* Don't send new driver endpoint */);
  unlock_vnode(vp);
  unlock_vmnt(parent_vmp);
  unlock_vnode(root_node);
  unlock_vmnt(new_vmp);
  unlock_bsf();
  return(r);
 }
 /*===========================================================================*
 *				mount_pfs				     *
 *===========================================================================*/
 PUBLIC void mount_pfs(void)
 {
 /* Mount the Pipe File Server. It's not really mounted onto the file system,
   but it's necessary it has a vmnt entry to make locking easier */
  dev_t dev;
  struct vmnt *vmp;
  if ((dev = find_free_nonedev()) == NO_DEV)
 	panic("VFS: no nonedev to initialize PFS");
  if ((vmp = get_free_vmnt()) == NULL)
 	panic("VFS: no vmnt to initialize PFS");
  alloc_nonedev(dev);
  vmp->m_dev = dev;
  vmp->m_fs_e = PFS_PROC_NR;
  strcpy(vmp->m_label, "pfs");
 }
 /*===========================================================================*
 *                              do_umount                                    *
 *===========================================================================*/
 PUBLIC int do_umount(void)
 {
 /* Perform the umount(name) system call. */
  char label[LABEL_MAX];
  dev_t dev;
  int r;
  char fullpath[PATH_MAX+1];
  /* Only the super-user may do umount. */
  if (!super_user) return(EPERM);
  /* If 'name' is not for a block special file or mountpoint, return error. */
  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
 	return(err_code);
  if ((dev = name_to_dev(TRUE /*allow_mountpt*/, fullpath)) == NO_DEV)
 	return(err_code);
  if ((r = unmount(dev, label)) != OK) return(r);
  /* Return the label of the mounted file system, so that the caller
   * can shut down the corresponding server process.
   */
  if (strlen(label) >= M3_LONG_STRING)	/* should never evaluate to true */
 	label[M3_LONG_STRING-1] = 0;
  strcpy(m_out.umount_label, label);
  return(OK);
 }
 /*===========================================================================*
 *                              unmount                                      *
 *===========================================================================*/
 PUBLIC int unmount(
  dev_t dev,			/* block-special device */
  char *label			/* buffer to retrieve label, or NULL */
 )
 {
  struct vnode *vp;
  struct vmnt *vmp_i = NULL, *vmp = NULL;
  int count, locks, r;
  /* Find vmnt that is to be unmounted */
  for (vmp_i = &vmnt[0]; vmp_i < &vmnt[NR_MNTS]; ++vmp_i) {
 	  if (vmp_i->m_dev == dev) {
 		  if(vmp) panic("device mounted more than once: %d", dev);
 		  vmp = vmp_i;
 	  }
  }
  /* Did we find the vmnt (i.e., was dev a mounted device)? */
  if(!vmp) return(EINVAL);
  lock_bsf();
  assert(lock_vmnt(vmp, VMNT_EXCL) == OK);
  /* See if the mounted device is busy.  Only 1 vnode using it should be
   * open -- the root vnode -- and that inode only 1 time. */
  locks = count = 0;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
 	  if (vp->v_ref_count > 0 && vp->v_dev == dev) {
 		count += vp->v_ref_count;
 		if (is_vnode_locked(vp)) locks++;
 	  }
  if (count > 1 || locks > 1) {
 	unlock_vmnt(vmp);
 	unlock_bsf();
 	return(EBUSY);    /* can't umount a busy file system */
  }
  /* Tell FS to drop all inode references for root inode except 1. */
  vnode_clean_refs(vmp->m_root_node);
  if (vmp->m_mounted_on) {
 	put_vnode(vmp->m_mounted_on);
 	vmp->m_mounted_on = NULL;
  }
  vmp->m_comm.c_max_reqs = 1;	/* Force max concurrent reqs to just one, so
 				 * we won't send any messages after the
 				 * unmount request */
  /* Tell FS to unmount */
  if ((r = req_unmount(vmp->m_fs_e)) != OK)              /* Not recoverable. */
 	printf("VFS: ignoring failed umount attempt FS endpoint: %d (%d)\n",
 	       vmp->m_fs_e, r);
  if (is_nonedev(vmp->m_dev)) free_nonedev(vmp->m_dev);
  if (label != NULL) strcpy(label, vmp->m_label);
  if (vmp->m_root_node) {	/* PFS lacks a root node */
 	vmp->m_root_node->v_ref_count = 0;
 	vmp->m_root_node->v_fs_count = 0;
 	vmp->m_root_node->v_sdev = NO_DEV;
 	vmp->m_root_node = NULL;
  }
  vmp->m_dev = NO_DEV;
  vmp->m_fs_e = NONE;
  /* The root FS will handle block I/O requests for this device now. */
  update_bspec(dev, ROOT_FS_E, 1 /* send new driver endpoint */);
  unlock_vmnt(vmp);
  unlock_bsf();
  return(OK);
 }
 /*===========================================================================*
 *				unmount_all				     *
 *===========================================================================*/
 PUBLIC void unmount_all(void)
 {
 /* Unmount all filesystems.  File systems are mounted on other file systems,
 * so you have to pull off the loose bits repeatedly to get it all undone.
 */
  int i;
  struct vmnt *vmp;
  /* Now unmount the rest */
  for (i = 0; i < NR_MNTS; i++) {
 	/* Unmount at least one. */
 	for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) {
 		if (vmp->m_dev != NO_DEV)
 			unmount(vmp->m_dev, NULL);
 	}
  }
  check_vnode_locks();
  check_vmnt_locks();
  check_filp_locks();
  check_bsf_lock();
 }
 /*===========================================================================*
 *                              name_to_dev                                  *
 *===========================================================================*/
 PRIVATE dev_t name_to_dev(int allow_mountpt, char path[PATH_MAX+1])
 {
 /* Convert the block special file in 'user_fullpath' to a device number.
 * If the given path is not a block special file, but 'allow_mountpt' is set
 * and the path is the root node of a mounted file system, return that device
 * number. In all other cases, return NO_DEV and an error code in 'err_code'.
 */
  dev_t dev;
  struct vnode *vp;
  struct vmnt *vmp;
  struct lookup resolve;
  lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  /* Request lookup */
  if ((vp = eat_path(&resolve, fp)) == NULL) return(NO_DEV);
  if ((vp->v_mode & I_TYPE) == I_BLOCK_SPECIAL) {
 	dev = vp->v_sdev;
  } else if (allow_mountpt && vp->v_vmnt->m_root_node == vp) {
 	dev = vp->v_dev;
  } else {
 	err_code = ENOTBLK;
 	dev = NO_DEV;
  }
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(dev);
 }
 /*===========================================================================*
 *                              is_nonedev				     *
 *===========================================================================*/
 PRIVATE int is_nonedev(dev_t dev)
 {
 /* Return whether the given device is a "none" pseudo device.
 */
  return (major(dev) == NONE_MAJOR &&
 	minor(dev) > 0 && minor(dev) <= NR_NONEDEVS);
 }
 /*===========================================================================*
 *                              find_free_nonedev			     *
 *===========================================================================*/
 PRIVATE dev_t find_free_nonedev(void)
 {
 /* Find a free "none" pseudo device. Do not allocate it yet.
 */
  int i;
  for (i = 0; i < NR_NONEDEVS; i++)
 	if (!GET_BIT(nonedev, i))
 		return makedev(NONE_MAJOR, i + 1);
  err_code = EMFILE;
  return NO_DEV;
 }
--- a/servers/avfs/open.c
+++ b/servers/avfs/open.c
@ -0,0 +1,734 @@
 /* This file contains the procedures for creating, opening, closing, and
 * seeking on files.
 *
 * The entry points into this file are
 *   do_creat:	perform the CREAT system call
 *   do_open:	perform the OPEN system call
 *   do_mknod:	perform the MKNOD system call
 *   do_mkdir:	perform the MKDIR system call
 *   do_close:	perform the CLOSE system call
 *   do_lseek:  perform the LSEEK system call
 *   do_llseek: perform the LLSEEK system call
 */
 #include "fs.h"
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <string.h>
 #include <unistd.h>
 #include <minix/callnr.h>
 #include <minix/com.h>
 #include <minix/u64.h>
 #include "file.h"
 #include "fproc.h"
 #include "dmap.h"
 #include "lock.h"
 #include "param.h"
 #include <dirent.h>
 #include <assert.h>
 #include <minix/vfsif.h>
 #include "vnode.h"
 #include "vmnt.h"
 #include "path.h"
 PRIVATE char mode_map[] = {R_BIT, W_BIT, R_BIT|W_BIT, 0};
 FORWARD _PROTOTYPE( int common_open, (char path[PATH_MAX+1], int oflags,
 				      mode_t omode)			);
 FORWARD _PROTOTYPE( struct vnode *new_node, (struct lookup *resolve,
 					     int oflags, mode_t bits)	);
 FORWARD _PROTOTYPE( int pipe_open, (struct vnode *vp, mode_t bits,
 				    int oflags)				);
 /*===========================================================================*
 *				do_creat				     *
 *===========================================================================*/
 PUBLIC int do_creat()
 {
 /* Perform the creat(name, mode) system call. */
  int r;
  char fullpath[PATH_MAX+1];
  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
 	return(err_code);
  r = common_open(fullpath, O_WRONLY | O_CREAT | O_TRUNC, (mode_t) m_in.mode);
  return(r);
 }
 /*===========================================================================*
 *				do_open					     *
 *===========================================================================*/
 PUBLIC int do_open()
 {
 /* Perform the open(name, flags,...) system call. */
  int create_mode = 0;		/* is really mode_t but this gives problems */
  int r;
  char fullpath[PATH_MAX+1];
  /* If O_CREAT is set, open has three parameters, otherwise two. */
  if (m_in.mode & O_CREAT) {
 	create_mode = m_in.c_mode;
 	r = fetch_name(m_in.c_name, m_in.name1_length, M1, fullpath);
  } else {
 	r = fetch_name(m_in.name, m_in.name_length, M3, fullpath);
  }
  if (r != OK) return(err_code); /* name was bad */
  r = common_open(fullpath, m_in.mode, create_mode);
  return(r);
 }
 /*===========================================================================*
 *				common_open				     *
 *===========================================================================*/
 PRIVATE int common_open(char path[PATH_MAX+1], int oflags, mode_t omode)
 {
 /* Common code from do_creat and do_open. */
  int b, r, exist = TRUE, major_dev;
  dev_t dev;
  mode_t bits;
  struct filp *filp, *filp2;
  struct vnode *vp;
  struct vmnt *vmp;
  struct dmap *dp;
  struct lookup resolve;
  /* Remap the bottom two bits of oflags. */
  bits = (mode_t) mode_map[oflags & O_ACCMODE];
  if (!bits) return(EINVAL);
  /* See if file descriptor and filp slots are available. */
  if ((r = get_fd(0, bits, &m_in.fd, &filp)) != OK) return(r);
  lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp);
  /* If O_CREATE is set, try to make the file. */
  if (oflags & O_CREAT) {
        omode = I_REGULAR | (omode & ALL_MODES & fp->fp_umask);
 	vp = new_node(&resolve, oflags, omode);
 	r = err_code;
 	if (r == OK) exist = FALSE;	/* We just created the file */
 	else if (r != EEXIST) {		/* other error */
 		if (vp) unlock_vnode(vp);
 		unlock_filp(filp);
 		return(r);
 	}
 	else exist = !(oflags & O_EXCL);/* file exists, if the O_EXCL
 					   flag is set this is an error */
  } else {
 	/* Scan path name */
 	resolve.l_vmnt_lock = VMNT_READ;
 	resolve.l_vnode_lock = VNODE_OPCL;
 	if ((vp = eat_path(&resolve, fp)) == NULL) {
 		unlock_filp(filp);
 		return(err_code);
 	}
 	if (vmp != NULL) unlock_vmnt(vmp);
  }
  /* Claim the file descriptor and filp slot and fill them in. */
  fp->fp_filp[m_in.fd] = filp;
  FD_SET(m_in.fd, &fp->fp_filp_inuse);
  filp->filp_count = 1;
  filp->filp_vno = vp;
  filp->filp_flags = oflags;
  /* Only do the normal open code if we didn't just create the file. */
  if (exist) {
 	/* Check protections. */
 	if ((r = forbidden(vp, bits)) == OK) {
 		/* Opening reg. files, directories, and special files differ */
 		switch (vp->v_mode & I_TYPE) {
 		   case I_REGULAR:
 			/* Truncate regular file if O_TRUNC. */
 			if (oflags & O_TRUNC) {
 				if ((r = forbidden(vp, W_BIT)) != OK)
 					break;
 				truncate_vnode(vp, 0);
 			}
 			break;
 		   case I_DIRECTORY:
 			/* Directories may be read but not written. */
 			r = (bits & W_BIT ? EISDIR : OK);
 			break;
 		   case I_CHAR_SPECIAL:
 			/* Invoke the driver for special processing. */
 			dev = (dev_t) vp->v_sdev;
 			r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE));
 			if (r == SUSPEND) suspend(FP_BLOCKED_ON_DOPEN);
 			else vp = filp->filp_vno; /* Might be updated by
 						   * dev_open/clone_opcl */
 			break;
 		   case I_BLOCK_SPECIAL:
 			lock_bsf();
 			/* Invoke the driver for special processing. */
 			dev = (dev_t) vp->v_sdev;
 			r = dev_open(dev, who_e, bits | (oflags & ~O_ACCMODE));
 			if (r != OK) {
 				unlock_bsf();
 				break;
 			}
 			/* Check whether the device is mounted or not. If so,
 			 * then that FS is responsible for this device. Else
 			 * we default to ROOT_FS. */
 			vp->v_bfs_e = ROOT_FS_E; /* By default */
 			for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp)
 				if (vmp->m_dev == vp->v_sdev)
 					vp->v_bfs_e = vmp->m_fs_e;
 			/* Get the driver endpoint of the block spec device */
 			major_dev = major(vp->v_sdev);
 			if (major_dev < 0 || major_dev >= NR_DEVICES)
 				r = ENXIO;
 			else
 				dp = &dmap[major_dev];
 			if (r != OK || dp->dmap_driver == NONE) {
 				printf("VFS: driver not found for device %d\n",
 					vp->v_sdev);
 				r = ENXIO;
 				unlock_bsf();
 				break;
 			}
 			/* Send the driver endpoint (even when known already)*/
 			if (vp->v_bfs_e != ROOT_FS_E) {
 				/* but only when it's the ROOT_FS */
 				unlock_bsf();
 				break;
 			}
 			if ((r = req_newdriver(vp->v_bfs_e, vp->v_sdev,
 					       dp->dmap_driver)) != OK) {
 				printf("VFS: error sending driver endpoint\n");
 				r = ENXIO;
 			}
 			unlock_bsf();
 			break;
 		   case I_NAMED_PIPE:
 			/* Create a mapped inode on PFS which handles reads
 			   and writes to this named pipe. */
 			tll_upgrade(&vp->v_lock);
 			r = map_vnode(vp, PFS_PROC_NR);
 			if (r == OK) {
 				vp->v_pipe = I_PIPE;
 				if (vp->v_ref_count == 1) {
 					vp->v_pipe_rd_pos = 0;
 					vp->v_pipe_wr_pos = 0;
 					if (vp->v_size != 0)
 						r = truncate_vnode(vp, 0);
 				}
 				oflags |= O_APPEND;	/* force append mode */
 				filp->filp_flags = oflags;
 			}
 			if (r == OK) {
 				r = pipe_open(vp, bits, oflags);
 			}
 			if (r != ENXIO) {
 				/* See if someone else is doing a rd or wt on
 				 * the FIFO.  If so, use its filp entry so the
 				 * file position will be automatically shared.
 				 */
 				b = (bits & R_BIT ? R_BIT : W_BIT);
 				filp->filp_count = 0; /* don't find self */
 				if ((filp2 = find_filp(vp, b)) != NULL) {
 					/* Co-reader or writer found. Use it.*/
 					fp->fp_filp[m_in.fd] = filp2;
 					filp2->filp_count++;
 					filp2->filp_vno = vp;
 					filp2->filp_flags = oflags;
 					/* v_count was incremented after the
 					 * vnode has been found. i_count was
 					 * incremented incorrectly in FS, not
 					 * knowing that we were going to use an
 					 * existing filp entry.  Correct this
 					 * error.
 					 */
 					unlock_vnode(vp);
 					put_vnode(vp);
 				} else {
 					/* Nobody else found. Restore filp. */
 					filp->filp_count = 1;
 				}
 			}
 			break;
 		}
 	}
  }
  unlock_filp(filp);
  /* If error, release inode. */
  if (r != OK) {
 	if (r != SUSPEND) {
 		fp->fp_filp[m_in.fd] = NULL;
 		FD_CLR(m_in.fd, &fp->fp_filp_inuse);
 		filp->filp_count = 0;
 		filp->filp_vno = NULL;
 		put_vnode(vp);
 	}
  } else {
 	r = m_in.fd;
  }
  return(r);
 }
 /*===========================================================================*
 *				new_node				     *
 *===========================================================================*/
 PRIVATE struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits)
 {
 /* Try to create a new inode and return a pointer to it. If the inode already
   exists, return a pointer to it as well, but set err_code accordingly.
   NULL is returned if the path cannot be resolved up to the last
   directory, or when the inode cannot be created due to permissions or
   otherwise. */
  struct vnode *dirp, *vp;
  struct vmnt *dir_vmp, *vp_vmp;
  int r;
  struct node_details res;
  struct lookup findnode;
  char *path;
  path = resolve->l_path;	/* For easy access */
  lookup_init(&findnode, path, resolve->l_flags, &dir_vmp, &dirp);
  findnode.l_vmnt_lock = VMNT_WRITE;
  findnode.l_vnode_lock = VNODE_WRITE; /* dir node */
  /* When O_CREAT and O_EXCL flags are set, the path may not be named by a
   * symbolic link. */
  if (oflags & O_EXCL) findnode.l_flags |= PATH_RET_SYMLINK;
  /* See if the path can be opened down to the last directory. */
  if ((dirp = last_dir(&findnode, fp)) == NULL) return(NULL);
  /* The final directory is accessible. Get final component of the path. */
  findnode.l_vmp = &vp_vmp;
  findnode.l_vnode = &vp;
  findnode.l_vnode_lock = (oflags & O_TRUNC) ? VNODE_WRITE : VNODE_OPCL;
  vp = advance(dirp, &findnode, fp);
  assert(vp_vmp == NULL);	/* Lookup to last dir should have yielded lock
 				 * on vmp or final component does not exist. */
  /* The combination of a symlink with absolute path followed by a danglink
   * symlink results in a new path that needs to be re-resolved entirely. */
  if (path[0] == '/') {
 printf("XXX: dangling symlink needs re-resolving\n");
 	unlock_vnode(dirp);
 	unlock_vmnt(dir_vmp);
 	put_vnode(dirp);
 	if (vp != NULL) {
 		unlock_vnode(vp);
 		put_vnode(vp);
 	}
 	return new_node(resolve, oflags, bits);
  }
  if (vp == NULL && err_code == ENOENT) {
 	/* Last path component does not exist. Make a new directory entry. */
 	if ((vp = get_free_vnode()) == NULL) {
 		/* Can't create new entry: out of vnodes. */
 		unlock_vnode(dirp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(dirp);
 		return(NULL);
 	}
 	lock_vnode(vp, VNODE_OPCL);
 	if ((r = forbidden(dirp, W_BIT|X_BIT)) != OK ||
 	    (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid,
 			    fp->fp_effgid, path, &res)) != OK ) {
 		/* Can't create inode either due to permissions or some other
 		 * problem. In case r is EEXIST, we might be dealing with a
 		 * dangling symlink.*/
 		if (r == EEXIST) {
 			struct vnode *slp, *old_wd;
 			/* Resolve path up to symlink */
 			findnode.l_flags = PATH_RET_SYMLINK;
 			findnode.l_vnode_lock = VNODE_READ;
 			findnode.l_vnode = &slp;
 			slp = advance(dirp, &findnode, fp);
 			if (slp != NULL) {
 				if (S_ISLNK(slp->v_mode)) {
 					/* Get contents of link */
 					r = req_rdlink(slp->v_fs_e,
 						       slp->v_inode_nr,
 						       VFS_PROC_NR,
 						       path,
 						       PATH_MAX, 0);
 					if (r < 0) {
 						/* Failed to read link */
 						unlock_vnode(slp);
 						unlock_vnode(dirp);
 						unlock_vmnt(dir_vmp);
 						put_vnode(slp);
 						put_vnode(dirp);
 						err_code = r;
 						return(NULL);
 					}
 					path[r] = '\0'; /* Terminate path */
 				}
 				unlock_vnode(slp);
 				put_vnode(slp);
 			}
 			/* Try to create the inode the dangling symlink was
 			 * pointing to. We have to use dirp as starting point
 			 * as there might be multiple successive symlinks
 			 * crossing multiple mountpoints. */
 			old_wd = fp->fp_wd; /* Save orig. working dirp */
 			fp->fp_wd = dirp;
 			vp = new_node(resolve, oflags, bits);
 			fp->fp_wd = old_wd; /* Restore */
 			if (vp != NULL) {
 				unlock_vnode(dirp);
 				unlock_vmnt(dir_vmp);
 				put_vnode(dirp);
 				*(resolve->l_vnode) = vp;
 				return(vp);
 			}
 			r = err_code;
 		}
 		if (r == EEXIST)
 			err_code = EIO; /* Impossible, we have verified that
 					 * the last component doesn't exist and
 					 * is not a dangling symlink. */
 		else
 			err_code = r;
 		unlock_vnode(dirp);
 		unlock_vnode(vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(dirp);
 		return(NULL);
 	}
 	/* Store results and mark vnode in use */
 	vp->v_fs_e = res.fs_e;
 	vp->v_inode_nr = res.inode_nr;
 	vp->v_mode = res.fmode;
 	vp->v_size = res.fsize;
 	vp->v_uid = res.uid;
 	vp->v_gid = res.gid;
 	vp->v_sdev = res.dev;
 	vp->v_vmnt = dirp->v_vmnt;
 	vp->v_dev = vp->v_vmnt->m_dev;
 	vp->v_fs_count = 1;
 	vp->v_ref_count = 1;
  } else {
 	/* Either last component exists, or there is some other problem. */
 	if (vp != NULL) {
 		r = EEXIST;	/* File exists or a symlink names a file while
 				 * O_EXCL is set. */
 	} else
 		r = err_code;	/* Other problem. */
  }
  err_code = r;
  /* When dirp equals vp, we shouldn't release the lock as a vp is locked only
   * once. Releasing the lock would cause the resulting vp not be locked and
   * cause mayhem later on. */
  if (dirp != vp) {
 	unlock_vnode(dirp);
  }
  unlock_vmnt(dir_vmp);
  put_vnode(dirp);
  *(resolve->l_vnode) = vp;
  return(vp);
 }
 /*===========================================================================*
 *				pipe_open				     *
 *===========================================================================*/
 PRIVATE int pipe_open(register struct vnode *vp, register mode_t bits,
 	register int oflags)
 {
 /*  This function is called from common_open. It checks if
 *  there is at least one reader/writer pair for the pipe, if not
 *  it suspends the caller, otherwise it revives all other blocked
 *  processes hanging on the pipe.
 */
  vp->v_pipe = I_PIPE;
  if((bits & (R_BIT|W_BIT)) == (R_BIT|W_BIT)) return(ENXIO);
  /* Find the reader/writer at the other end of the pipe */
  if (find_filp(vp, bits & W_BIT ? R_BIT : W_BIT) == NULL) {
 	/* Not found */
 	if (oflags & O_NONBLOCK) {
 		if (bits & W_BIT) return(ENXIO);
 	} else {
 		/* Let's wait for the other side to show up */
 		suspend(FP_BLOCKED_ON_POPEN);	/* suspend caller */
 		return(SUSPEND);
 	}
  } else if (susp_count > 0) { /* revive blocked processes */
 	release(vp, OPEN, susp_count);
 	release(vp, CREAT, susp_count);
  }
  return(OK);
 }
 /*===========================================================================*
 *				do_mknod				     *
 *===========================================================================*/
 PUBLIC int do_mknod()
 {
 /* Perform the mknod(name, mode, addr) system call. */
  register mode_t bits, mode_bits;
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  /* Only the super_user may make nodes other than fifos. */
  mode_bits = (mode_t) m_in.mk_mode;		/* mode of the inode */
  if (!super_user && (((mode_bits & I_TYPE) != I_NAMED_PIPE) &&
      ((mode_bits & I_TYPE) != I_UNIX_SOCKET))) {
 	return(EPERM);
  }
  bits = (mode_bits & I_TYPE) | (mode_bits & ALL_MODES & fp->fp_umask);
  /* Open directory that's going to hold the new node. */
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
  /* Make sure that the object is a directory */
  if ((vp->v_mode & I_TYPE) != I_DIRECTORY) {
 	r = ENOTDIR;
  } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
 	r = req_mknod(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid,
 		      fp->fp_effgid, bits, m_in.mk_z0);
  }
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				do_mkdir				     *
 *===========================================================================*/
 PUBLIC int do_mkdir()
 {
 /* Perform the mkdir(name, mode) system call. */
  mode_t bits;			/* mode bits for the new inode */
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  bits = I_DIRECTORY | (m_in.mode & RWX_MODES & fp->fp_umask);
  if ((vp = last_dir(&resolve, fp)) == NULL) return(err_code);
  /* Make sure that the object is a directory */
  if ((vp->v_mode & I_TYPE) != I_DIRECTORY) {
 	r = ENOTDIR;
  } else if ((r = forbidden(vp, W_BIT|X_BIT)) == OK) {
 	r = req_mkdir(vp->v_fs_e, vp->v_inode_nr, fullpath, fp->fp_effuid,
 		      fp->fp_effgid, bits);
  }
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				do_lseek				     *
 *===========================================================================*/
 PUBLIC int do_lseek()
 {
 /* Perform the lseek(ls_fd, offset, whence) system call. */
  register struct filp *rfilp;
  int r = OK;
  long offset;
  u64_t pos, newpos;
  /* Check to see if the file descriptor is valid. */
  if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
  /* No lseek on pipes. */
  if (rfilp->filp_vno->v_pipe == I_PIPE) {
 	unlock_filp(rfilp);
 	return(ESPIPE);
  }
  /* The value of 'whence' determines the start position to use. */
  switch(m_in.whence) {
    case SEEK_SET: pos = cvu64(0);	break;
    case SEEK_CUR: pos = rfilp->filp_pos;	break;
    case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size);	break;
    default: unlock_filp(rfilp); return(EINVAL);
  }
  offset = m_in.offset_lo;
  if (offset >= 0)
 	newpos = add64ul(pos, offset);
  else
 	newpos = sub64ul(pos, -offset);
  /* Check for overflow. */
  if (ex64hi(newpos) != 0)
 	r = EINVAL;
  else {
 	rfilp->filp_pos = newpos;
 	/* insert the new position into the output message */
 	m_out.reply_l1 = ex64lo(newpos);
 	if (cmp64(newpos, rfilp->filp_pos) != 0) {
 		/* Inhibit read ahead request */
 		r = req_inhibread(rfilp->filp_vno->v_fs_e,
 				  rfilp->filp_vno->v_inode_nr);
 	}
  }
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				do_llseek				     *
 *===========================================================================*/
 PUBLIC int do_llseek()
 {
 /* Perform the llseek(ls_fd, offset, whence) system call. */
  register struct filp *rfilp;
  u64_t pos, newpos;
  int r = OK;
  /* Check to see if the file descriptor is valid. */
  if ( (rfilp = get_filp(m_in.ls_fd, VNODE_READ)) == NULL) return(err_code);
  /* No lseek on pipes. */
  if (rfilp->filp_vno->v_pipe == I_PIPE) {
 	unlock_filp(rfilp);
 	return(ESPIPE);
  }
  /* The value of 'whence' determines the start position to use. */
  switch(m_in.whence) {
    case SEEK_SET: pos = cvu64(0);	break;
    case SEEK_CUR: pos = rfilp->filp_pos;	break;
    case SEEK_END: pos = cvul64(rfilp->filp_vno->v_size);	break;
    default: unlock_filp(rfilp); return(EINVAL);
  }
  newpos = add64(pos, make64(m_in.offset_lo, m_in.offset_high));
  /* Check for overflow. */
  if (( (long) m_in.offset_high > 0) && cmp64(newpos, pos) < 0)
      r = EINVAL;
  else if (( (long) m_in.offset_high < 0) && cmp64(newpos, pos) > 0)
      r = EINVAL;
  else {
 	rfilp->filp_pos = newpos;
 	/* insert the new position into the output message */
 	m_out.reply_l1 = ex64lo(newpos);
 	m_out.reply_l2 = ex64hi(newpos);
 	if (cmp64(newpos, rfilp->filp_pos) != 0) {
 		/* Inhibit read ahead request */
 		r = req_inhibread(rfilp->filp_vno->v_fs_e,
 				  rfilp->filp_vno->v_inode_nr);
 	}
  }
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				do_close				     *
 *===========================================================================*/
 PUBLIC int do_close()
 {
 /* Perform the close(fd) system call. */
  return close_fd(fp, m_in.fd);
 }
 /*===========================================================================*
 *				close_fd				     *
 *===========================================================================*/
 PUBLIC int close_fd(rfp, fd_nr)
 struct fproc *rfp;
 int fd_nr;
 {
 /* Perform the close(fd) system call. */
  register struct filp *rfilp;
  register struct vnode *vp;
  struct file_lock *flp;
  int lock_count;
  /* First locate the vnode that belongs to the file descriptor. */
  if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code);
  vp = rfilp->filp_vno;
  close_filp(rfilp);
  rfp->fp_filp[fd_nr] = NULL;
  FD_CLR(fd_nr, &rfp->fp_cloexec_set);
  FD_CLR(fd_nr, &rfp->fp_filp_inuse);
  /* Check to see if the file is locked.  If so, release all locks. */
  if (nr_locks > 0) {
 	lock_count = nr_locks;	/* save count of locks */
 	for (flp = &file_lock[0]; flp < &file_lock[NR_LOCKS]; flp++) {
 		if (flp->lock_type == 0) continue;	/* slot not in use */
 		if (flp->lock_vnode == vp && flp->lock_pid == rfp->fp_pid) {
 			flp->lock_type = 0;
 			nr_locks--;
 		}
 	}
 	if (nr_locks < lock_count)
 		lock_revive();	/* one or more locks released */
  }
  return(OK);
 }
 /*===========================================================================*
 *				close_reply				     *
 *===========================================================================*/
 PUBLIC void close_reply()
 {
 	/* No need to do anything */
 }
--- a/servers/avfs/param.h
+++ b/servers/avfs/param.h
@ -0,0 +1,63 @@
 #ifndef __VFS_PARAM_H__
 #define __VFS_PARAM_H__
 /* The following names are synonyms for the variables in the input message. */
 #define addr	      m1_i3
 #define buffer	      m1_p1
 #define child_endpt      m1_i2
 #define co_mode	      m1_i1
 #define fd	      m1_i1
 #define fd2	      m1_i2
 #define group	      m1_i3
 #define ls_fd	      m2_i1
 #define mk_mode	      m1_i2
 #define mk_z0	      m1_i3
 #define mode	      m3_i2
 #define c_mode        m1_i3
 #define c_name        m1_p1
 #define name	      m3_p1
 #define flength       m2_l1
 #define name1	      m1_p1
 #define name2	      m1_p2
 #define	name_length   m3_i1
 #define name1_length  m1_i1
 #define name2_length  m1_i2
 #define nbytes        m1_i2
 #define owner	      m1_i2
 #define pathname      m3_ca1
 #define pid	      m1_i3
 #define ENDPT	      m1_i1
 #define offset_lo     m2_l1
 #define offset_high   m2_l2
 #define ctl_req       m4_l1
 #define mount_flags   m1_i3
 #define request       m1_i2
 #define sig	      m1_i2
 #define endpt1	      m1_i1
 #define fs_label      m1_p3
 #define umount_label  m3_ca1
 #define tp	      m2_l1
 #define utime_actime  m2_l1
 #define utime_modtime m2_l2
 #define utime_file    m2_p1
 #define utime_length  m2_i1
 #define utime_strlen  m2_i2
 #define whence	      m2_i2
 #define svrctl_req    m2_i1
 #define svrctl_argp   m2_p1
 #define info_what     m1_i1
 #define info_where    m1_p1
 #define md_label	m2_p1
 #define md_label_len	m2_l1
 #define md_major	m2_i1
 #define md_style	m2_i2
 #define md_flags	m2_i3
 /* The following names are synonyms for the variables in the output message. */
 #define reply_type    m_type
 #define reply_l1      m2_l1
 #define reply_l2      m2_l2
 #define reply_i1      m1_i1
 #define reply_i2      m1_i2
 #endif
--- a/servers/avfs/path.c
+++ b/servers/avfs/path.c
@ -0,0 +1,687 @@
 /* lookup() is the main routine that controls the path name lookup. It
 * handles mountpoints and symbolic links. The actual lookup requests
 * are sent through the req_lookup wrapper function.
 */
 #include "fs.h"
 #include <string.h>
 #include <minix/callnr.h>
 #include <minix/com.h>
 #include <minix/keymap.h>
 #include <minix/const.h>
 #include <minix/endpoint.h>
 #include <unistd.h>
 #include <assert.h>
 #include <minix/vfsif.h>
 #include <sys/stat.h>
 #include <sys/un.h>
 #include <dirent.h>
 #include "threads.h"
 #include "vmnt.h"
 #include "vnode.h"
 #include "path.h"
 #include "fproc.h"
 #include "param.h"
 /* Set to following define to 1 if you really want to use the POSIX definition
 * (IEEE Std 1003.1, 2004) of pathname resolution. POSIX requires pathnames
 * with a traling slash (and that do not entirely consist of slash characters)
 * to be treated as if a single dot is appended. This means that for example
 * mkdir("dir/", ...) and rmdir("dir/") will fail because the call tries to
 * create or remove the directory '.'. Historically, Unix systems just ignore
 * trailing slashes.
 */
 #define DO_POSIX_PATHNAME_RES	0
 FORWARD _PROTOTYPE( int lookup, (struct vnode *dirp, struct lookup *resolve,
 				 node_details_t *node, struct fproc *rfp));
 FORWARD _PROTOTYPE( int check_perms, (endpoint_t ep, cp_grant_id_t io_gr,
 				      size_t pathlen)			);
 /*===========================================================================*
 *				advance					     *
 *===========================================================================*/
 PUBLIC struct vnode *advance(dirp, resolve, rfp)
 struct vnode *dirp;
 struct lookup *resolve;
 struct fproc *rfp;
 {
 /* Resolve a path name starting at dirp to a vnode. */
  int r;
  int do_downgrade = 1;
  struct vnode *new_vp, *vp;
  struct vmnt *vmp;
  struct node_details res = {0,0,0,0,0,0,0};
  tll_access_t initial_locktype;
  assert(dirp);
  assert(resolve->l_vnode_lock != TLL_NONE);
  assert(resolve->l_vmnt_lock != TLL_NONE);
  if (resolve->l_vnode_lock == VNODE_READ)
 	initial_locktype = VNODE_OPCL;
  else
 	initial_locktype = resolve->l_vnode_lock;
  /* Get a free vnode and lock it */
  if ((new_vp = get_free_vnode()) == NULL) return(NULL);
  lock_vnode(new_vp, initial_locktype);
  /* Lookup vnode belonging to the file. */
  if ((r = lookup(dirp, resolve, &res, rfp)) != OK) {
 	err_code = r;
 	unlock_vnode(new_vp);
 	return(NULL);
  }
  /* Check whether we already have a vnode for that file */
  if ((vp = find_vnode(res.fs_e, res.inode_nr)) != NULL) {
 	unlock_vnode(new_vp);	/* Don't need this anymore */
 	do_downgrade = (lock_vnode(vp, initial_locktype) != EBUSY);
 	/* Unfortunately, by the time we get the lock, another thread might've
 	 * rid of the vnode (e.g., find_vnode found the vnode while a
 	 * req_putnode was being processed). */
 	if (vp->v_ref_count == 0) { /* vnode vanished! */
 		/* As the lookup before increased the usage counters in the FS,
 		 * we can simply set the usage counters to 1 and proceed as
 		 * normal, because the putnode resulted in a use count of 1 in
 		 * the FS. Other data is still valid, because the vnode was
 		 * marked as pending lock, so get_free_vnode hasn't
 		 * reinitialized the vnode yet. */
 		vp->v_fs_count = 1;
 		if (vp->v_mapfs_e != NONE) vp->v_mapfs_count = 1;
 	} else {
 		vp->v_fs_count++;	/* We got a reference from the FS */
 	}
  } else {
 	/* Vnode not found, fill in the free vnode's fields */
 	new_vp->v_fs_e = res.fs_e;
 	new_vp->v_inode_nr = res.inode_nr;
 	new_vp->v_mode = res.fmode;
 	new_vp->v_size = res.fsize;
 	new_vp->v_uid = res.uid;
 	new_vp->v_gid = res.gid;
 	new_vp->v_sdev = res.dev;
 	if( (vmp = find_vmnt(new_vp->v_fs_e)) == NULL)
 		  panic("advance: vmnt not found");
 	new_vp->v_vmnt = vmp;
 	new_vp->v_dev = vmp->m_dev;
 	new_vp->v_fs_count = 1;
 	vp = new_vp;
  }
  dup_vnode(vp);
  if (do_downgrade) {
 	/* Only downgrade a lock if we managed to lock it in the first place */
 	*(resolve->l_vnode) = vp;
 	if (initial_locktype != resolve->l_vnode_lock)
 		tll_downgrade(&vp->v_lock);
 #if LOCK_DEBUG
 	if (resolve->l_vnode_lock == VNODE_READ)
 		fp->fp_vp_rdlocks++;
 #endif
  }
  return(vp);
 }
 /*===========================================================================*
 *				eat_path				     *
 *===========================================================================*/
 PUBLIC struct vnode *eat_path(resolve, rfp)
 struct lookup *resolve;
 struct fproc *rfp;
 {
 /* Resolve path to a vnode. advance does the actual work. */
  struct vnode *start_dir;
  start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
  return advance(start_dir, resolve, rfp);
 }
 /*===========================================================================*
 *				last_dir				     *
 *===========================================================================*/
 PUBLIC struct vnode *last_dir(resolve, rfp)
 struct lookup *resolve;
 struct fproc *rfp;
 {
 /* Parse a path, as far as the last directory, fetch the vnode
 * for the last directory into the vnode table, and return a pointer to the
 * vnode. In addition, return the final component of the path in 'string'. If
 * the last directory can't be opened, return NULL and the reason for
 * failure in 'err_code'. We can't parse component by component as that would
 * be too expensive. Alternatively, we cut off the last component of the path,
 * and parse the path up to the penultimate component.
 */
  size_t len;
  char *cp;
  char dir_entry[PATH_MAX+1];
  struct vnode *start_dir, *res;
  /* Is the path absolute or relative? Initialize 'start_dir' accordingly. */
  start_dir = (resolve->l_path[0] == '/' ? rfp->fp_rd : rfp->fp_wd);
  len = strlen(resolve->l_path);
  /* If path is empty, return ENOENT. */
  if (len == 0)	{
 	err_code = ENOENT;
 	return(NULL);
  }
 #if !DO_POSIX_PATHNAME_RES
  /* Remove trailing slashes */
  while (len > 1 && resolve->l_path[len-1] == '/') {
 	  len--;
 	  resolve->l_path[len]= '\0';
  }
 #endif
  cp = strrchr(resolve->l_path, '/');
  if (cp == NULL) {
 	/* Just one entry in the current working directory */
 	struct vmnt *vmp;
 	vmp = find_vmnt(start_dir->v_fs_e);
 	if (lock_vmnt(vmp, resolve->l_vmnt_lock) != EBUSY)
 		*resolve->l_vmp = vmp;
 	lock_vnode(start_dir, resolve->l_vnode_lock);
 	*resolve->l_vnode = start_dir;
 	dup_vnode(start_dir);
 	return(start_dir);
  } else if (cp[1] == '\0') {
 	/* Path ends in a slash. The directory entry is '.' */
 	strcpy(dir_entry, ".");
  } else {
 	/* A path name for the directory and a directory entry */
 	strcpy(dir_entry, cp+1);
 	cp[1] = '\0';
  }
  /* Remove trailing slashes */
  while(cp > resolve->l_path && cp[0] == '/') {
 	cp[0]= '\0';
 	cp--;
  }
  resolve->l_flags = PATH_NOFLAGS;
  res = advance(start_dir, resolve, rfp);
  if (res == NULL) return(NULL);
  /* Copy the directory entry back to user_fullpath */
  strncpy(resolve->l_path, dir_entry, PATH_MAX);
  return(res);
 }
 /*===========================================================================*
 *				lookup					     *
 *===========================================================================*/
 PRIVATE int lookup(start_node, resolve, result_node, rfp)
 struct vnode *start_node;
 struct lookup *resolve;
 node_details_t *result_node;
 struct fproc *rfp;
 {
 /* Resolve a path name relative to start_node. */
  int r, symloop;
  endpoint_t fs_e;
  size_t path_off, path_left_len;
  ino_t dir_ino, root_ino;
  uid_t uid;
  gid_t gid;
  struct vnode *dir_vp;
  struct vmnt *vmp, *vmpres;
  struct lookup_res res;
  assert(resolve->l_vmp);
  assert(resolve->l_vnode);
  *(resolve->l_vmp) = vmpres = NULL; /* No vmnt found nor locked yet */
  /* Empty (start) path? */
  if (resolve->l_path[0] == '\0') {
 	result_node->inode_nr = 0;
 	return(ENOENT);
  }
  if (!rfp->fp_rd || !rfp->fp_wd) {
 	printf("VFS: lookup %d: no rd/wd\n", rfp->fp_endpoint);
 	return(ENOENT);
  }
  fs_e = start_node->v_fs_e;
  dir_ino = start_node->v_inode_nr;
  vmpres = find_vmnt(fs_e);
  /* Is the process' root directory on the same partition?,
   * if so, set the chroot directory too. */
  if (rfp->fp_rd->v_dev == rfp->fp_wd->v_dev)
 	root_ino = rfp->fp_rd->v_inode_nr;
  else
 	root_ino = 0;
  /* Set user and group ids according to the system call */
  uid = (call_nr == ACCESS ? rfp->fp_realuid : rfp->fp_effuid);
  gid = (call_nr == ACCESS ? rfp->fp_realgid : rfp->fp_effgid);
  symloop = 0;	/* Number of symlinks seen so far */
  /* Lock vmnt */
  if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
 	if (r == EBUSY) /* vmnt already locked */
 		vmpres = NULL;
  }
  *(resolve->l_vmp) = vmpres;
  /* Issue the request */
  r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
  if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
 	if (vmpres) unlock_vmnt(vmpres);
 	*(resolve->l_vmp) = NULL;
 	return(r); /* i.e., an error occured */
  }
  /* While the response is related to mount control set the
   * new requests respectively */
  while (r == EENTERMOUNT || r == ELEAVEMOUNT || r == ESYMLINK) {
 	/* Update user_fullpath to reflect what's left to be parsed. */
 	path_off = res.char_processed;
 	path_left_len = strlen(&resolve->l_path[path_off]);
 	memmove(resolve->l_path, &resolve->l_path[path_off], path_left_len);
 	resolve->l_path[path_left_len] = '\0'; /* terminate string */
 	/* Update the current value of the symloop counter */
 	symloop += res.symloop;
 	if (symloop > SYMLOOP_MAX) {
 		if (vmpres) unlock_vmnt(vmpres);
 		*(resolve->l_vmp) = NULL;
 		return(ELOOP);
 	}
 	/* Symlink encountered with absolute path */
 	if (r == ESYMLINK) {
 		dir_vp = rfp->fp_rd;
 		vmp = NULL;
 	} else if (r == EENTERMOUNT) {
 		/* Entering a new partition */
 		dir_vp = 0;
 		/* Start node is now the mounted partition's root node */
 		for (vmp = &vmnt[0]; vmp != &vmnt[NR_MNTS]; ++vmp) {
 			if (vmp->m_dev != NO_DEV && vmp->m_mounted_on) {
 			   if (vmp->m_mounted_on->v_inode_nr == res.inode_nr &&
 			       vmp->m_mounted_on->v_fs_e == res.fs_e) {
 				dir_vp = vmp->m_root_node;
 				break;
 			   }
 			}
 		}
 		assert(dir_vp);
 	} else {
 		/* Climbing up mount */
 		/* Find the vmnt that represents the partition on
 		 * which we "climb up". */
 		if ((vmp = find_vmnt(res.fs_e)) == NULL) {
 			panic("VFS lookup: can't find parent vmnt");
 		}
 		/* Make sure that the child FS does not feed a bogus path
 		 * to the parent FS. That is, when we climb up the tree, we
 		 * must've encountered ".." in the path, and that is exactly
 		 * what we're going to feed to the parent */
 		if(strncmp(resolve->l_path, "..", 2) != 0 ||
 		   (resolve->l_path[2] != '\0' && resolve->l_path[2] != '/')) {
 			printf("VFS: bogus path: %s\n", resolve->l_path);
 			if (vmpres) unlock_vmnt(vmpres);
 			*(resolve->l_vmp) = NULL;
 			return(ENOENT);
 		}
 		/* Start node is the vnode on which the partition is
 		 * mounted */
 		dir_vp = vmp->m_mounted_on;
 	}
 	/* Set the starting directories inode number and FS endpoint */
 	fs_e = dir_vp->v_fs_e;
 	dir_ino = dir_vp->v_inode_nr;
 	/* Is the process' root directory on the same partition?,
 	 * if so, set the chroot directory too. */
 	if (dir_vp->v_dev == rfp->fp_rd->v_dev)
 		root_ino = rfp->fp_rd->v_inode_nr;
 	else
 		root_ino = 0;
 	/* Unlock a previously locked vmnt if locked and lock new vmnt */
 	if (vmpres) unlock_vmnt(vmpres);
 	vmpres = find_vmnt(fs_e);
 	if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) {
 		if (r == EBUSY)
 			vmpres = NULL;	/* Already locked */
 	}
 	*(resolve->l_vmp) = vmpres;
 	r = req_lookup(fs_e, dir_ino, root_ino, uid, gid, resolve, &res, rfp);
 	if (r != OK && r != EENTERMOUNT && r != ELEAVEMOUNT && r != ESYMLINK) {
 		if (vmpres) unlock_vmnt(vmpres);
 		*(resolve->l_vmp) = NULL;
 		return(r);
 	}
  }
  /* Fill in response fields */
  result_node->inode_nr = res.inode_nr;
  result_node->fmode = res.fmode;
  result_node->fsize = res.fsize;
  result_node->dev = res.dev;
  result_node->fs_e = res.fs_e;
  result_node->uid = res.uid;
  result_node->gid = res.gid;
  return(r);
 }
 /*===========================================================================*
 *				lookup_init				     *
 *===========================================================================*/
 PUBLIC void lookup_init(resolve, path, flags, vmp, vp)
 struct lookup *resolve;
 char *path;
 int flags;
 struct vmnt **vmp;
 struct vnode **vp;
 {
  assert(vmp != NULL);
  assert(vp != NULL);
  resolve->l_path = path;
  resolve->l_flags = flags;
  resolve->l_vmp = vmp;
  resolve->l_vnode = vp;
  resolve->l_vmnt_lock = TLL_NONE;
  resolve->l_vnode_lock = TLL_NONE;
  *vmp = NULL;	/* Initialize lookup result to NULL */
  *vp = NULL;
 }
 /*===========================================================================*
 *				get_name				     *
 *===========================================================================*/
 PUBLIC int get_name(dirp, entry, ename)
 struct vnode *dirp;
 struct vnode *entry;
 char ename[NAME_MAX + 1];
 {
  u64_t pos, new_pos;
  int r, consumed, totalbytes;
  char buf[(sizeof(struct dirent) + NAME_MAX) * 8];
  struct dirent *cur;
  pos = make64(0, 0);
  if ((dirp->v_mode & I_TYPE) != I_DIRECTORY) {
 	return(EBADF);
  }
  do {
 	r = req_getdents(dirp->v_fs_e, dirp->v_inode_nr, pos, buf, sizeof(buf),
 			 &new_pos, 1);
 	if (r == 0) {
 		return(ENOENT); /* end of entries -- matching inode !found */
 	} else if (r < 0) {
 		return(r); /* error */
 	}
 	consumed = 0; /* bytes consumed */
 	totalbytes = r; /* number of bytes to consume */
 	do {
 		cur = (struct dirent *) (buf + consumed);
 		if (entry->v_inode_nr == cur->d_ino) {
 			/* found the entry we were looking for */
 			strncpy(ename, cur->d_name, NAME_MAX);
 			ename[NAME_MAX] = '\0';
 			return(OK);
 		}
 		/* not a match -- move on to the next dirent */
 		consumed += cur->d_reclen;
 	} while (consumed < totalbytes);
 	pos = new_pos;
  } while (1);
 }
 /*===========================================================================*
 *				canonical_path				     *
 *===========================================================================*/
 PUBLIC int canonical_path(orig_path, canon_path, rfp)
 char *orig_path;
 char canon_path[PATH_MAX+1]; /* should have length PATH_MAX+1 */
 struct fproc *rfp;
 {
  int len = 0;
  int r, symloop = 0;
  struct vnode *dir_vp, *parent_dir;
  struct vmnt *dir_vmp, *parent_vmp;
  char component[NAME_MAX+1];
  char link_path[PATH_MAX+1];
  char temp_path[PATH_MAX+1];
  struct lookup resolve;
  dir_vp = NULL;
  strncpy(temp_path, orig_path, PATH_MAX);
  do {
 	if (dir_vp) {
 		unlock_vnode(dir_vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(dir_vp);
 	}
 	/* Resolve to the last directory holding the file */
 	lookup_init(&resolve, temp_path, PATH_NOFLAGS, &dir_vmp, &dir_vp);
 	resolve.l_vmnt_lock = VMNT_READ;
 	resolve.l_vnode_lock = VNODE_READ;
 	if ((dir_vp = last_dir(&resolve, rfp)) == NULL) return(err_code);
 	/* dir_vp points to dir and resolve path now contains only the
 	 * filename.
 	 */
 	strcpy(canon_path, resolve.l_path); /* Store file name */
 	/* check if the file is a symlink, if so resolve it */
 	r = rdlink_direct(canon_path, link_path, rfp);
 	if (r <= 0) {
 		strcpy(temp_path, canon_path);
 		break;
 	}
 	/* encountered a symlink -- loop again */
 	strcpy(temp_path, link_path);
 	symloop++;
  } while (symloop < SYMLOOP_MAX);
  if (symloop >= SYMLOOP_MAX) {
 	if (dir_vp) {
 		unlock_vnode(dir_vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(dir_vp);
 	}
 	return(ELOOP);
  }
  while(dir_vp != rfp->fp_rd) {
 	strcpy(temp_path, "..");
 	/* check if we're at the root node of the file system */
 	if (dir_vp->v_vmnt->m_root_node == dir_vp) {
 		unlock_vnode(dir_vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(dir_vp);
 		dir_vp = dir_vp->v_vmnt->m_mounted_on;
 		dir_vmp = dir_vp->v_vmnt;
 		assert(lock_vmnt(dir_vmp, VMNT_READ) == OK);
 		assert(lock_vnode(dir_vp, VNODE_READ) == OK);
 		dup_vnode(dir_vp);
 	}
 	lookup_init(&resolve, temp_path, PATH_NOFLAGS, &parent_vmp,
 		    &parent_dir);
 	resolve.l_vmnt_lock = VMNT_READ;
 	resolve.l_vnode_lock = VNODE_READ;
 	if ((parent_dir = advance(dir_vp, &resolve, rfp)) == NULL) {
 		unlock_vnode(dir_vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(dir_vp);
 		return(err_code);
 	}
 	/* now we have to retrieve the name of the parent directory */
 	if (get_name(parent_dir, dir_vp, component) != OK) {
 		unlock_vnode(parent_dir);
 		unlock_vmnt(parent_vmp);
 		unlock_vnode(dir_vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(parent_dir);
 		put_vnode(dir_vp);
 		return(ENOENT);
 	}
 	len += strlen(component) + 1;
 	if (len > PATH_MAX) {
 		/* adding the component to canon_path would exceed PATH_MAX */
 		unlock_vnode(parent_dir);
 		unlock_vmnt(parent_vmp);
 		unlock_vnode(dir_vp);
 		unlock_vmnt(dir_vmp);
 		put_vnode(parent_dir);
 		put_vnode(dir_vp);
 		return(ENOMEM);
 	}
 	/* store result of component in canon_path */
 	/* first make space by moving the contents of canon_path to
 	 * the right. Move strlen + 1 bytes to include the terminating '\0'.
 	 */
 	memmove(canon_path+strlen(component)+1, canon_path,
 						strlen(canon_path) + 1);
 	/* Copy component into canon_path */
 	memmove(canon_path, component, strlen(component));
 	/* Put slash into place */
 	canon_path[strlen(component)] = '/';
 	/* Store parent_dir result, and continue the loop once more */
 	unlock_vnode(dir_vp);
 	unlock_vmnt(dir_vmp);
 	put_vnode(dir_vp);
 	dir_vp = parent_dir;
  }
  unlock_vnode(dir_vp);
  unlock_vmnt(parent_vmp);
  put_vnode(dir_vp);
  /* add the leading slash */
  if (strlen(canon_path) >= PATH_MAX) return(ENAMETOOLONG);
  memmove(canon_path+1, canon_path, strlen(canon_path));
  canon_path[0] = '/';
  return(OK);
 }
 /*===========================================================================*
 *				check_perms				     *
 *===========================================================================*/
 PRIVATE int check_perms(ep, io_gr, pathlen)
 endpoint_t ep;
 cp_grant_id_t io_gr;
 size_t pathlen;
 {
  int r, slot;
  struct vnode *vp;
  struct vmnt *vmp;
  struct fproc *rfp;
  char orig_path[PATH_MAX+1];
  char canon_path[PATH_MAX+1];
  char temp_path[PATH_MAX+1];
  struct lookup resolve;
  if (isokendpt(ep, &slot) != OK) return(EINVAL);
  if (pathlen < UNIX_PATH_MAX || pathlen > PATH_MAX) return(EINVAL);
  rfp = &(fproc[slot]);
  memset(canon_path, '\0', PATH_MAX+1);
  r = sys_safecopyfrom(PFS_PROC_NR, io_gr, (vir_bytes) 0,
 				(vir_bytes) temp_path, pathlen, D);
  if (r != OK) return(r);
  temp_path[pathlen] = '\0';
  /* save path from pfs before permissions checking modifies it */
  memcpy(orig_path, temp_path, PATH_MAX+1);
  /* get the canonical path to the socket file */
  if ((r = canonical_path(orig_path, canon_path, rfp)) != OK)
 	return(r);
  if (strlen(canon_path) >= pathlen) return(ENAMETOOLONG);
  /* copy canon_path back to PFS */
  r = sys_safecopyto(PFS_PROC_NR, (cp_grant_id_t) io_gr, (vir_bytes) 0,
 				(vir_bytes) canon_path, strlen(canon_path)+1,
 				D);
  if (r != OK) return(r);
  /* reload user_fullpath for permissions checking */
  memcpy(temp_path, orig_path, PATH_MAX+1);
  lookup_init(&resolve, temp_path, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  if ((vp = eat_path(&resolve, rfp)) == NULL) return(err_code);
  /* check permissions */
  r = forbidden(vp, (R_BIT | W_BIT));
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				do_check_perms				     *
 *===========================================================================*/
 PUBLIC int do_check_perms(void)
 {
  return check_perms(m_in.USER_ENDPT, (cp_grant_id_t) m_in.IO_GRANT,
 		     (size_t) m_in.COUNT);
 }
--- a/servers/avfs/path.h
+++ b/servers/avfs/path.h
@ -0,0 +1,13 @@
 #ifndef __VFS_PATH_H__
 #define __VFS_PATH_H__
 struct lookup {
  char *l_path;			/* Path to lookup */
  int l_flags;			/* VFS/FS flags (see <minix/vfsif.h>) */
  tll_access_t l_vmnt_lock;	/* Lock to obtain on vmnt */
  tll_access_t l_vnode_lock;	/* Lock to obtain on vnode */
  struct vmnt **l_vmp;		/* vmnt object that was locked */
  struct vnode **l_vnode;	/* vnode object that was locked */
 };
 #endif
--- a/servers/avfs/pipe.c
+++ b/servers/avfs/pipe.c
@ -0,0 +1,637 @@
 /* This file deals with the suspension and revival of processes.  A process can
 * be suspended because it wants to read or write from a pipe and can't, or
 * because it wants to read or write from a special file and can't.  When a
 * process can't continue it is suspended, and revived later when it is able
 * to continue.
 *
 * The entry points into this file are
 *   do_pipe:	  perform the PIPE system call
 *   pipe_check:  check to see that a read or write on a pipe is feasible now
 *   suspend:	  suspend a process that cannot do a requested read or write
 *   release:	  check to see if a suspended process can be released and do
 *                it
 *   revive:	  mark a suspended process as able to run again
 *   unsuspend_by_endpt: revive all processes blocking on a given process
 *   do_unpause:  a signal has been sent to a process; see if it suspended
 */
 #include "fs.h"
 #include <fcntl.h>
 #include <signal.h>
 #include <assert.h>
 #include <minix/callnr.h>
 #include <minix/endpoint.h>
 #include <minix/com.h>
 #include <minix/u64.h>
 #include <sys/select.h>
 #include <sys/time.h>
 #include "file.h"
 #include "fproc.h"
 #include "dmap.h"
 #include "param.h"
 #include "select.h"
 #include <minix/vfsif.h>
 #include "vnode.h"
 #include "vmnt.h"
 /*===========================================================================*
 *				do_pipe					     *
 *===========================================================================*/
 PUBLIC int do_pipe()
 {
 /* Perform the pipe(fil_des) system call. */
  register struct fproc *rfp;
  int r;
  struct filp *fil_ptr0, *fil_ptr1;
  int fil_des[2];		/* reply goes here */
  struct vnode *vp;
  struct vmnt *vmp;
  struct node_details res;
  /* See if a free vnode is available */
  if ((vp = get_free_vnode()) == NULL) return(err_code);
  lock_vnode(vp, VNODE_OPCL);
  /* Get a lock on PFS */
  if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
  lock_vmnt(vmp, VMNT_WRITE);
  /* Acquire two file descriptors. */
  rfp = fp;
  if ((r = get_fd(0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) {
 	unlock_vnode(vp);
 	unlock_vmnt(vmp);
 	return(r);
  }
  rfp->fp_filp[fil_des[0]] = fil_ptr0;
  FD_SET(fil_des[0], &rfp->fp_filp_inuse);
  fil_ptr0->filp_count = 1;		/* mark filp in use */
  if ((r = get_fd(0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) {
 	rfp->fp_filp[fil_des[0]] = NULL;
 	FD_CLR(fil_des[0], &rfp->fp_filp_inuse);
 	fil_ptr0->filp_count = 0;	/* mark filp free */
 	unlock_filp(fil_ptr0);
 	unlock_vnode(vp);
 	unlock_vmnt(vmp);
 	return(r);
  }
  rfp->fp_filp[fil_des[1]] = fil_ptr1;
  FD_SET(fil_des[1], &rfp->fp_filp_inuse);
  fil_ptr1->filp_count = 1;
  /* Create a named pipe inode on PipeFS */
  r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
 		  NO_DEV, &res);
  if (r != OK) {
 	rfp->fp_filp[fil_des[0]] = NULL;
 	FD_CLR(fil_des[0], &rfp->fp_filp_inuse);
 	fil_ptr0->filp_count = 0;
 	rfp->fp_filp[fil_des[1]] = NULL;
 	FD_CLR(fil_des[1], &rfp->fp_filp_inuse);
 	fil_ptr1->filp_count = 0;
 	unlock_filp(fil_ptr1);
 	unlock_filp(fil_ptr0);
 	unlock_vnode(vp);
 	unlock_vmnt(vmp);
 	return(r);
  }
  /* Fill in vnode */
  vp->v_fs_e = res.fs_e;
  vp->v_mapfs_e = res.fs_e;
  vp->v_inode_nr = res.inode_nr;
  vp->v_mapinode_nr = res.inode_nr;
  vp->v_mode = res.fmode;
  vp->v_pipe = I_PIPE;
  vp->v_pipe_rd_pos= 0;
  vp->v_pipe_wr_pos= 0;
  vp->v_fs_count = 1;
  vp->v_mapfs_count = 1;
  vp->v_ref_count = 1;
  vp->v_size = 0;
  vp->v_vmnt = NULL;
  vp->v_dev = NO_DEV;
  /* Fill in filp objects */
  fil_ptr0->filp_vno = vp;
  dup_vnode(vp);
  fil_ptr1->filp_vno = vp;
  fil_ptr0->filp_flags = O_RDONLY;
  fil_ptr1->filp_flags = O_WRONLY;
  m_out.reply_i1 = fil_des[0];
  m_out.reply_i2 = fil_des[1];
  unlock_filps(fil_ptr0, fil_ptr1);
  unlock_vmnt(vmp);
  return(OK);
 }
 /*===========================================================================*
 *				map_vnode				     *
 *===========================================================================*/
 PUBLIC int map_vnode(vp, map_to_fs_e)
 struct vnode *vp;
 endpoint_t map_to_fs_e;
 {
  int r;
  struct vmnt *vmp;
  struct node_details res;
  if(vp->v_mapfs_e != NONE) return(OK);	/* Already mapped; nothing to do. */
  if ((vmp = find_vmnt(map_to_fs_e)) == NULL)
 	panic("Can't map to unknown endpoint");
  if (lock_vmnt(vmp, VMNT_WRITE) == EBUSY)
 	vmp = NULL;	/* Already locked, do not unlock */
  /* Create a temporary mapping of this inode to another FS. Read and write
   * operations on data will be handled by that FS. The rest by the 'original'
   * FS that holds the inode. */
  if ((r = req_newnode(map_to_fs_e, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
 		       vp->v_dev, &res)) == OK) {
 	vp->v_mapfs_e = res.fs_e;
 	vp->v_mapinode_nr = res.inode_nr;
 	vp->v_mapfs_count = 1;
  }
  if (vmp) unlock_vmnt(vmp);
  return(r);
 }
 /*===========================================================================*
 *				pipe_check				     *
 *===========================================================================*/
 PUBLIC int pipe_check(vp, rw_flag, oflags, bytes, position, notouch)
 register struct vnode *vp;	/* the inode of the pipe */
 int rw_flag;			/* READING or WRITING */
 int oflags;			/* flags set by open or fcntl */
 register int bytes;		/* bytes to be read or written (all chunks) */
 u64_t position;			/* current file position */
 int notouch;			/* check only */
 {
 /* Pipes are a little different.  If a process reads from an empty pipe for
 * which a writer still exists, suspend the reader.  If the pipe is empty
 * and there is no writer, return 0 bytes.  If a process is writing to a
 * pipe and no one is reading from it, give a broken pipe error.
 */
  off_t pos;
  int r = OK;
  if (ex64hi(position) != 0)
 	panic("pipe_check: position too large in pipe");
  pos = ex64lo(position);
  /* If reading, check for empty pipe. */
  if (rw_flag == READING) {
 	if (pos >= vp->v_size) {
 		/* Process is reading from an empty pipe. */
 		if (find_filp(vp, W_BIT) != NULL) {
 			/* Writer exists */
 			if (oflags & O_NONBLOCK)
 				r = EAGAIN;
 			else
 				r = SUSPEND;
 			/* If need be, activate sleeping writers. */
 			if (susp_count > 0)
 				release(vp, WRITE, susp_count);
 		}
 		return(r);
 	}
 	return(bytes);
  }
  /* Process is writing to a pipe. */
  if (find_filp(vp, R_BIT) == NULL) {
 	/* Process is writing, but there is no reader. Tell kernel to generate
 	 * a SIGPIPE signal. */
 	if (!notouch) sys_kill(fp->fp_endpoint, SIGPIPE);
 	return(EPIPE);
  }
  /* Calculate how many bytes can be written. */
  if (pos + bytes > PIPE_BUF) {
 	if (oflags & O_NONBLOCK) {
 		if (bytes <= PIPE_BUF) {
 			/* Write has to be atomic */
 			return(EAGAIN);
 		}
 		/* Compute available space */
 		bytes = PIPE_BUF - pos;
 		if (bytes > 0)  {
 			/* Do a partial write. Need to wakeup reader */
 			if (!notouch)
 				release(vp, READ, susp_count);
 			return(bytes);
 		} else {
 			/* Pipe is full */
 			return(EAGAIN);
 		}
 	}
 	if (bytes > PIPE_BUF) {
 		/* Compute available space */
 		bytes = PIPE_BUF - pos;
 		if (bytes > 0) {
 			/* Do a partial write. Need to wakeup reader
 			 * since we'll suspend ourself in read_write()
 			 */
 			if (!notouch)
 				release(vp, READ, susp_count);
 			return(bytes);
 		}
 	}
 	/* Pipe is full */
 	return(SUSPEND);
  }
  /* Writing to an empty pipe.  Search for suspended reader. */
  if (pos == 0 && !notouch)
 	release(vp, READ, susp_count);
  /* Requested amount fits */
  return(bytes);
 }
 /*===========================================================================*
 *				suspend					     *
 *===========================================================================*/
 PUBLIC void suspend(int why)
 {
 /* Take measures to suspend the processing of the present system call.
 * Store the parameters to be used upon resuming in the process table.
 * (Actually they are not used when a process is waiting for an I/O device,
 * but they are needed for pipes, and it is not worth making the distinction.)
 * The SUSPEND pseudo error should be returned after calling suspend().
 */
 #if DO_SANITYCHECKS
  if (why == FP_BLOCKED_ON_PIPE)
 	panic("suspend: called for FP_BLOCKED_ON_PIPE");
  if(fp_is_blocked(fp))
 	panic("suspend: called for suspended process");
  if(why == FP_BLOCKED_ON_NONE)
 	panic("suspend: called for FP_BLOCKED_ON_NONE");
 #endif
  if (why == FP_BLOCKED_ON_POPEN)
 	  /* #procs susp'ed on pipe*/
 	  susp_count++;
  fp->fp_blocked_on = why;
  assert(fp->fp_grant == GRANT_INVALID || !GRANT_VALID(fp->fp_grant));
  fp->fp_block_fd = m_in.fd;
  fp->fp_block_callnr = call_nr;
  fp->fp_flags &= ~FP_SUSP_REOPEN;		/* Clear this flag. The caller
 						 * can set it when needed.
 						 */
  if (why == FP_BLOCKED_ON_LOCK) {
 	fp->fp_buffer = (char *) m_in.name1;	/* third arg to fcntl() */
 	fp->fp_nbytes = m_in.request;		/* second arg to fcntl() */
  } else {
 	fp->fp_buffer = m_in.buffer;		/* for reads and writes */
 	fp->fp_nbytes = m_in.nbytes;
  }
 }
 /*===========================================================================*
 *				wait_for				     *
 *===========================================================================*/
 PUBLIC void wait_for(endpoint_t who)
 {
  if(who == NONE || who == ANY)
 	panic("suspend on NONE or ANY");
  suspend(FP_BLOCKED_ON_OTHER);
  fp->fp_task = who;
 }
 /*===========================================================================*
 *				pipe_suspend					     *
 *===========================================================================*/
 PUBLIC void pipe_suspend(rw_flag, fd_nr, buf, size)
 int rw_flag;
 int fd_nr;
 char *buf;
 size_t size;
 {
 /* Take measures to suspend the processing of the present system call.
 * Store the parameters to be used upon resuming in the process table.
 * (Actually they are not used when a process is waiting for an I/O device,
 * but they are needed for pipes, and it is not worth making the distinction.)
 * The SUSPEND pseudo error should be returned after calling suspend().
 */
 #if DO_SANITYCHECKS
  if(fp_is_blocked(fp))
 	panic("pipe_suspend: called for suspended process");
 #endif
  susp_count++;					/* #procs susp'ed on pipe*/
  fp->fp_blocked_on = FP_BLOCKED_ON_PIPE;
  assert(!GRANT_VALID(fp->fp_grant));
  fp->fp_block_fd = fd_nr;
  fp->fp_block_callnr = ((rw_flag == READING) ? READ : WRITE);
  fp->fp_buffer = buf;
  fp->fp_nbytes = size;
 }
 /*===========================================================================*
 *				unsuspend_by_endpt			     *
 *===========================================================================*/
 PUBLIC void unsuspend_by_endpt(endpoint_t proc_e)
 {
 /* Revive processes waiting for drivers (SUSPENDed) that have disappeared with
 * return code EAGAIN.
 */
  struct fproc *rp;
  for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) {
 	if (rp->fp_pid == PID_FREE) continue;
 	if (rp->fp_blocked_on == FP_BLOCKED_ON_OTHER && rp->fp_task == proc_e)
 		revive(rp->fp_endpoint, EAGAIN);
  }
  /* Revive processes waiting in drivers on select()s with EAGAIN too */
  select_unsuspend_by_endpt(proc_e);
  return;
 }
 /*===========================================================================*
 *				release					     *
 *===========================================================================*/
 PUBLIC void release(vp, op, count)
 register struct vnode *vp;	/* inode of pipe */
 int op;				/* READ, WRITE, OPEN or CREAT */
 int count;			/* max number of processes to release */
 {
 /* Check to see if any process is hanging on the pipe whose inode is in 'ip'.
 * If one is, and it was trying to perform the call indicated by 'call_nr',
 * release it.
 */
  register struct fproc *rp;
  struct filp *f;
  int selop;
  /* Trying to perform the call also includes SELECTing on it with that
   * operation.
   */
  if (op == READ || op == WRITE) {
 	if (op == READ)
 		selop = SEL_RD;
 	else
 		selop = SEL_WR;
 	for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 		if (f->filp_count < 1 || !(f->filp_pipe_select_ops & selop) ||
 		    f->filp_vno != vp)
 			continue;
 		select_callback(f, selop);
 		f->filp_pipe_select_ops &= ~selop;
 	}
  }
  /* Search the proc table. */
  for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) {
 	if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) &&
 	    !(rp->fp_flags & FP_REVIVED) && rp->fp_block_callnr == op &&
 	    rp->fp_filp[rp->fp_block_fd] != NULL &&
 	    rp->fp_filp[rp->fp_block_fd]->filp_vno == vp) {
 		revive(rp->fp_endpoint, 0);
 		susp_count--;	/* keep track of who is suspended */
 		if(susp_count < 0)
 			panic("susp_count now negative: %d", susp_count);
 		if (--count == 0) return;
 	}
  }
 }
 /*===========================================================================*
 *				revive					     *
 *===========================================================================*/
 PUBLIC void revive(proc_nr_e, returned)
 int proc_nr_e;			/* process to revive */
 int returned;			/* if hanging on task, how many bytes read */
 {
 /* Revive a previously blocked process. When a process hangs on tty, this
 * is the way it is eventually released.
 */
  register struct fproc *rfp;
  int blocked_on;
  int fd_nr, slot;
  struct filp *fil_ptr;
  if (proc_nr_e == NONE || isokendpt(proc_nr_e, &slot) != OK) return;
  rfp = &fproc[slot];
  if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return;
  /* The 'reviving' flag only applies to pipes.  Processes waiting for TTY get
   * a message right away.  The revival process is different for TTY and pipes.
   * For select and TTY revival, the work is already done, for pipes it is not:
   * the proc must be restarted so it can try again.
   */
  blocked_on = rfp->fp_blocked_on;
  if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_LOCK) {
 	/* Revive a process suspended on a pipe or lock. */
 	rfp->fp_flags |= FP_REVIVED;
 	reviving++;		/* process was waiting on pipe or lock */
  } else if (blocked_on == FP_BLOCKED_ON_DOPEN) {
 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
 	fd_nr = rfp->fp_block_fd;
 	if (returned < 0) {
 		fil_ptr = rfp->fp_filp[fd_nr];
 		lock_filp(fil_ptr, VNODE_OPCL);
 		rfp->fp_filp[fd_nr] = NULL;
 		FD_CLR(fd_nr, &rfp->fp_filp_inuse);
 		if (fil_ptr->filp_count != 1) {
 			panic("VFS: revive: bad count in filp: %d",
 				fil_ptr->filp_count);
 		}
 		fil_ptr->filp_count = 0;
 		unlock_filp(fil_ptr);
 		put_vnode(fil_ptr->filp_vno);
 		fil_ptr->filp_vno = NULL;
 		reply(proc_nr_e, returned);
 	} else {
 		reply(proc_nr_e, fd_nr);
 	}
  } else {
 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
 	if (blocked_on == FP_BLOCKED_ON_POPEN) {
 		/* process blocked in open or create */
 		reply(proc_nr_e, rfp->fp_block_fd);
 	} else if (blocked_on == FP_BLOCKED_ON_SELECT) {
 		reply(proc_nr_e, returned);
 	} else {
 		/* Revive a process suspended on TTY or other device.
 		 * Pretend it wants only what there is.
 		 */
 		rfp->fp_nbytes = returned;
 		/* If a grant has been issued by FS for this I/O, revoke
 		 * it again now that I/O is done.
 		 */
 		if (GRANT_VALID(rfp->fp_grant)) {
 			if(cpf_revoke(rfp->fp_grant)) {
 				panic("VFS: revoke failed for grant: %d",
 					rfp->fp_grant);
 			}
 			rfp->fp_grant = GRANT_INVALID;
 		}
 		reply(proc_nr_e, returned);	/* unblock the process */
 	}
  }
 }
 /*===========================================================================*
 *				unpause					     *
 *===========================================================================*/
 PUBLIC void unpause(proc_nr_e)
 int proc_nr_e;
 {
 /* A signal has been sent to a user who is paused on the file system.
 * Abort the system call with the EINTR error message.
 */
  register struct fproc *rfp, *org_fp;
  int slot, blocked_on, fild, status = EINTR, major_dev, minor_dev;
  struct filp *f;
  dev_t dev;
  message mess;
  int wasreviving = 0;
  if (isokendpt(proc_nr_e, &slot) != OK) {
 	printf("VFS: ignoring unpause for bogus endpoint %d\n", proc_nr_e);
 	return;
  }
  rfp = &fproc[slot];
  if (!fp_is_blocked(rfp)) return;
  blocked_on = rfp->fp_blocked_on;
  if (rfp->fp_flags & FP_REVIVED) {
 	rfp->fp_flags &= ~FP_REVIVED;
 	reviving--;
 	wasreviving = 1;
  }
  switch (blocked_on) {
 	case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */
 		break;
 	case FP_BLOCKED_ON_LOCK:/* process trying to set a lock with FCNTL */
 		break;
 	case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
 		select_forget(proc_nr_e);
 		break;
 	case FP_BLOCKED_ON_POPEN:	/* process trying to open a fifo */
 		break;
 	case FP_BLOCKED_ON_DOPEN:/* process trying to open a device */
 		/* Don't cancel OPEN. Just wait until the open completes. */
 		return;
 	case FP_BLOCKED_ON_OTHER:/* process trying to do device I/O (e.g. tty)*/
 		if (rfp->fp_flags & FP_SUSP_REOPEN) {
 			/* Process is suspended while waiting for a reopen.
 			 * Just reply EINTR.
 			 */
 			rfp->fp_flags &= ~FP_SUSP_REOPEN;
 			status = EINTR;
 			break;
 		}
 		fild = rfp->fp_block_fd;
 		if (fild < 0 || fild >= OPEN_MAX)
 			panic("file descriptor out-of-range");
 		f = rfp->fp_filp[fild];
 		dev = (dev_t) f->filp_vno->v_sdev;	/* device hung on */
 		major_dev = major(dev);
 		minor_dev = minor(dev);
 		mess.TTY_LINE = minor_dev;
 		mess.USER_ENDPT = rfp->fp_ioproc;
 		mess.IO_GRANT = (char *) rfp->fp_grant;
 		/* Tell kernel R or W. Mode is from current call, not open. */
 		mess.COUNT = rfp->fp_block_callnr == READ ? R_BIT : W_BIT;
 		mess.m_type = CANCEL;
 		org_fp = fp;
 		fp = rfp;	/* hack - ctty_io uses fp */
 		(*dmap[major_dev].dmap_io)(rfp->fp_task, &mess);
 		fp = org_fp;
 		status = mess.REP_STATUS;
 		if (status == SUSPEND)
 			return;		/* Process will be revived at a
 					 * later time.
 					 */
 		if (status == EAGAIN) status = EINTR;
 		if (GRANT_VALID(rfp->fp_grant)) {
 			(void) cpf_revoke(rfp->fp_grant);
 			rfp->fp_grant = GRANT_INVALID;
 		}
 		break;
 	default :
 		panic("VFS: unknown block reason: %d", blocked_on);
  }
  rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
  if ((blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_POPEN)&&
 	!wasreviving) {
 	susp_count--;
  }
  reply(proc_nr_e, status);	/* signal interrupted call */
 }
 #if DO_SANITYCHECKS
 /*===========================================================================*
 *				check_pipe			     *
 *===========================================================================*/
 PUBLIC int check_pipe(void)
 {
 /* Integrity check; verify that susp_count equals what the fproc table thinks
 * is suspended on a pipe */
  struct fproc *rfp;
  int count = 0;
  for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 	if (rfp->fp_pid == PID_FREE) continue;
 	if ( !(rfp->fp_flags & FP_REVIVED) &&
 	    (rfp->fp_blocked_on == FP_BLOCKED_ON_PIPE ||
 	     rfp->fp_blocked_on == FP_BLOCKED_ON_POPEN)) {
 		count++;
 	}
  }
  if (count != susp_count) {
 	printf("check_pipe: count %d susp_count %d\n", count, susp_count);
 	return(0);
  }
  return(l);
 }
 #endif
--- a/servers/avfs/protect.c
+++ b/servers/avfs/protect.c
@ -0,0 +1,274 @@
 /* This file deals with protection in the file system.  It contains the code
 * for four system calls that relate to protection.
 *
 * The entry points into this file are
 *   do_chmod:	perform the CHMOD and FCHMOD system calls
 *   do_chown:	perform the CHOWN and FCHOWN system calls
 *   do_umask:	perform the UMASK system call
 *   do_access:	perform the ACCESS system call
 */
 #include "fs.h"
 #include <unistd.h>
 #include <minix/callnr.h>
 #include "file.h"
 #include "fproc.h"
 #include "path.h"
 #include "param.h"
 #include <minix/vfsif.h>
 #include "vnode.h"
 #include "vmnt.h"
 /*===========================================================================*
 *				do_chmod				     *
 *===========================================================================*/
 PUBLIC int do_chmod()
 {
 /* Perform the chmod(name, mode) and fchmod(fd, mode) system calls. */
  struct filp *flp;
  struct vnode *vp;
  struct vmnt *vmp;
  int r;
  mode_t new_mode;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  flp = NULL;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_WRITE;
  if (call_nr == CHMOD) {
 	/* Temporarily open the file */
 	if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
 		return(err_code);
 	if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  } else {	/* call_nr == FCHMOD */
 	/* File is already opened; get a pointer to vnode from filp. */
 	if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL)
 		return(err_code);
 	vp = flp->filp_vno;
 	dup_vnode(vp);
  }
  /* Only the owner or the super_user may change the mode of a file.
   * No one may change the mode of a file on a read-only file system.
   */
  if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID)
 	r = EPERM;
  else
 	r = read_only(vp);
  if (r == OK) {
 	/* Now make the change. Clear setgid bit if file is not in caller's
 	 * group */
 	if (fp->fp_effuid != SU_UID && vp->v_gid != fp->fp_effgid)
 		m_in.mode &= ~I_SET_GID_BIT;
 	r = req_chmod(vp->v_fs_e, vp->v_inode_nr, m_in.mode, &new_mode);
 	if (r == OK)
 		vp->v_mode = new_mode;
  }
  if (call_nr == CHMOD) {
 	unlock_vnode(vp);
 	unlock_vmnt(vmp);
  } else {	/* FCHMOD */
 	unlock_filp(flp);
  }
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				do_chown				     *
 *===========================================================================*/
 PUBLIC int do_chown()
 {
 /* Perform the chown(path, owner, group) and fchmod(fd, owner, group) system
 * calls. */
  struct filp *flp;
  struct vnode *vp;
  struct vmnt *vmp;
  int r;
  uid_t uid;
  gid_t gid;
  mode_t new_mode;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  flp = NULL;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_WRITE;
  if (call_nr == CHOWN) {
 	/* Temporarily open the file. */
 	if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 		return(err_code);
 	if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  } else {	/* call_nr == FCHOWN */
 	/* File is already opened; get a pointer to the vnode from filp. */
 	if ((flp = get_filp(m_in.fd, VNODE_WRITE)) == NULL)
 		return(err_code);
 	vp = flp->filp_vno;
 	dup_vnode(vp);
  }
  r = read_only(vp);
  if (r == OK) {
 	/* FS is R/W. Whether call is allowed depends on ownership, etc. */
 	/* The super user can do anything, so check permissions only if we're
 	   a regular user. */
 	if (fp->fp_effuid != SU_UID) {
 		/* Regular users can only change groups of their own files. */
 		if (vp->v_uid != fp->fp_effuid) r = EPERM;
 		if (vp->v_uid != m_in.owner) r = EPERM;	/* no giving away */
 		if (fp->fp_effgid != m_in.group) r = EPERM;
 	}
  }
  if (r == OK) {
 	/* Do not change uid/gid if new uid/gid is -1. */
 	uid = (m_in.owner == (uid_t)-1 ? vp->v_uid : m_in.owner);
 	gid = (m_in.group == (gid_t)-1 ? vp->v_gid : m_in.group);
 	if ((r = req_chown(vp->v_fs_e, vp->v_inode_nr, uid, gid,
 		      &new_mode)) == OK) {
 		vp->v_uid = uid;
 		vp->v_gid = gid;
 		vp->v_mode = new_mode;
 	}
  }
  if (call_nr == CHOWN) {
 	unlock_vnode(vp);
 	unlock_vmnt(vmp);
  } else {	/* FCHOWN */
 	unlock_filp(flp);
  }
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				do_umask				     *
 *===========================================================================*/
 PUBLIC int do_umask()
 {
 /* Perform the umask(co_mode) system call. */
  register mode_t r;
  r = ~fp->fp_umask;		/* set 'r' to complement of old mask */
  fp->fp_umask = ~(m_in.co_mode & RWX_MODES);
  return(r);			/* return complement of old mask */
 }
 /*===========================================================================*
 *				do_access				     *
 *===========================================================================*/
 PUBLIC int do_access()
 {
 /* Perform the access(name, mode) system call. */
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  /* First check to see if the mode is correct. */
  if ( (m_in.mode & ~(R_OK | W_OK | X_OK)) != 0 && m_in.mode != F_OK)
 	return(EINVAL);
  /* Temporarily open the file. */
  if (fetch_name(m_in.name, m_in.name_length, M3, fullpath) != OK)
 	return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  r = forbidden(vp, m_in.mode);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				forbidden				     *
 *===========================================================================*/
 PUBLIC int forbidden(struct vnode *vp, mode_t access_desired)
 {
 /* Given a pointer to an vnode, 'vp', and the access desired, determine
 * if the access is allowed, and if not why not.  The routine looks up the
 * caller's uid in the 'fproc' table.  If access is allowed, OK is returned
 * if it is forbidden, EACCES is returned.
 */
  register mode_t bits, perm_bits;
  uid_t uid;
  gid_t gid;
  int r, shift;
  if (vp->v_uid == (uid_t) -1 || vp->v_gid == (gid_t) -1) return(EACCES);
  /* Isolate the relevant rwx bits from the mode. */
  bits = vp->v_mode;
  uid = (call_nr == ACCESS ? fp->fp_realuid : fp->fp_effuid);
  gid = (call_nr == ACCESS ? fp->fp_realgid : fp->fp_effgid);
  if (uid == SU_UID) {
 	/* Grant read and write permission.  Grant search permission for
 	 * directories.  Grant execute permission (for non-directories) if
 	 * and only if one of the 'X' bits is set.
 	 */
 	if ( (bits & I_TYPE) == I_DIRECTORY ||
 	     bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT))
 		perm_bits = R_BIT | W_BIT | X_BIT;
 	else
 		perm_bits = R_BIT | W_BIT;
  } else {
 	if (uid == vp->v_uid) shift = 6;		/* owner */
 	else if (gid == vp->v_gid) shift = 3;		/* group */
 	else if (in_group(fp, vp->v_gid) == OK) shift = 3; /* suppl. groups */
 	else shift = 0;					/* other */
 	perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT);
  }
  /* If access desired is not a subset of what is allowed, it is refused. */
  r = OK;
  if ((perm_bits | access_desired) != perm_bits) r = EACCES;
  /* Check to see if someone is trying to write on a file system that is
   * mounted read-only.
   */
  if (r == OK)
 	if (access_desired & W_BIT)
 		r = read_only(vp);
  return(r);
 }
 /*===========================================================================*
 *				read_only				     *
 *===========================================================================*/
 PUBLIC int read_only(vp)
 struct vnode *vp;		/* ptr to inode whose file sys is to be cked */
 {
 /* Check to see if the file system on which the inode 'ip' resides is mounted
 * read only.  If so, return EROFS, else return OK.
 */
  return((vp->v_vmnt->m_flags & VMNT_READONLY) ? EROFS : OK);
 }
--- a/servers/avfs/proto.h
+++ b/servers/avfs/proto.h
@ -0,0 +1,372 @@
 #ifndef __VFS_PROTO_H__
 #define __VFS_PROTO_H__
 /* Function prototypes. */
 #include "timers.h"
 #include "request.h"
 #include "tll.h"
 #include "threads.h"
 #include <minix/rs.h>
 /* Structs used in prototypes must be declared as such first. */
 struct filp;
 struct fproc;
 struct vmnt;
 struct vnode;
 struct lookup;
 struct worker_thread;
 struct job;
 typedef struct filp * filp_id_t;
 /* comm.c */
 _PROTOTYPE(int fs_sendrec, (endpoint_t fs_e, message *reqm)		);
 _PROTOTYPE(void fs_sendmore, (struct vmnt *vmp)				);
 _PROTOTYPE(void send_work, (void)					);
 /* device.c */
 _PROTOTYPE( int dev_open, (dev_t dev, endpoint_t proc_e, int flags)	);
 _PROTOTYPE( int dev_reopen, (dev_t dev, int filp_no, int flags)		);
 _PROTOTYPE( int dev_close, (dev_t dev, int filp_no)			);
 _PROTOTYPE( int dev_io, (int op, dev_t dev, endpoint_t proc_e, void *buf,
 		u64_t pos, size_t bytes, int flags, int suspend_reopen)	);
 _PROTOTYPE( int gen_opcl, (int op, dev_t dev, endpoint_t task_nr, int flags));
 _PROTOTYPE( int gen_io, (int task_nr, message *mess_ptr)		);
 _PROTOTYPE( int asyn_io, (int task_nr, message *mess_ptr)		);
 _PROTOTYPE( int no_dev, (int op, dev_t dev, int proc, int flags)	);
 _PROTOTYPE( int no_dev_io, (int, message *)				);
 _PROTOTYPE( int tty_opcl, (int op, dev_t dev, endpoint_t proc, int flags));
 _PROTOTYPE( int ctty_opcl, (int op, dev_t dev, endpoint_t proc, int flags));
 _PROTOTYPE( int clone_opcl, (int op, dev_t dev, int proc, int flags)	);
 _PROTOTYPE( int ctty_io, (int task_nr, message *mess_ptr)		);
 _PROTOTYPE( int do_ioctl, (void)					);
 _PROTOTYPE( void pm_setsid, (int proc_e)				);
 _PROTOTYPE( void dev_status, (message *)				);
 _PROTOTYPE( void dev_up, (int major)					);
 _PROTOTYPE( endpoint_t find_suspended_ep, (endpoint_t driver,
 					   cp_grant_id_t g)		);
 _PROTOTYPE( void reopen_reply, (void)					);
 _PROTOTYPE( void open_reply, (void)					);
 /* dmap.c */
 _PROTOTYPE( int do_mapdriver, (void)					);
 _PROTOTYPE( void init_dmap, (void)					);
 _PROTOTYPE( int dmap_driver_match, (endpoint_t proc, int major)		);
 _PROTOTYPE( void dmap_endpt_up, (int proc_nr)				);
 _PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr)			);
 _PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e)			);
 _PROTOTYPE( int do_mapdriver, (void)					);
 _PROTOTYPE( int map_service, (struct rprocpub *rpub)			);
 _PROTOTYPE( void dmap_unmap_by_endpt, (int proc_nr)			);
 _PROTOTYPE( struct dmap *get_dmap, (endpoint_t proc_e)			);
 _PROTOTYPE( int map_driver, (const char *label, int major, endpoint_t proc_nr,
 	int dev_style, int flags)					);
 _PROTOTYPE( int map_service, (struct rprocpub *rpub)			);
 /* exec.c */
 _PROTOTYPE( int pm_exec, (int proc_e, char *path, vir_bytes path_len,
 			  char *frame, vir_bytes frame_len, vir_bytes *pc));
 #define check_bsf_lock() do {						\
 	assert(mutex_trylock(&bsf_lock) == 0);				\
 	unlock_bsf();							\
 			} while(0)
 /* filedes.c */
 _PROTOTYPE( void check_filp_locks, (void)				);
 _PROTOTYPE( void check_filp_locks_by_me, (void)				);
 _PROTOTYPE( void init_filps, (void)					);
 _PROTOTYPE( struct filp *find_filp, (struct vnode *vp, mode_t bits)	);
 _PROTOTYPE( int get_fd, (int start, mode_t bits, int *k,
            struct filp **fpt)						);
 _PROTOTYPE( struct filp *get_filp, (int fild, tll_access_t locktype)				);
 _PROTOTYPE( struct filp *get_filp2, (struct fproc *rfp, int fild,
 					tll_access_t locktype)		);
 _PROTOTYPE( void lock_filp, (struct filp *filp, tll_access_t locktype)	);
 _PROTOTYPE( void unlock_filp, (struct filp *filp)			);
 _PROTOTYPE( void unlock_filps, (struct filp *filp1, struct filp *filp2)	);
 _PROTOTYPE( int invalidate, (struct filp *)				);
 _PROTOTYPE( int do_verify_fd, (void)					);
 _PROTOTYPE( int set_filp, (filp_id_t sfilp)				);
 _PROTOTYPE( int do_set_filp, (void)					);
 _PROTOTYPE( int copy_filp, (endpoint_t to_ep, filp_id_t cfilp)		);
 _PROTOTYPE( int do_copy_filp, (void)					);
 _PROTOTYPE( int put_filp, (filp_id_t pfilp)				);
 _PROTOTYPE( int do_put_filp, (void)					);
 _PROTOTYPE( int cancel_fd, (endpoint_t ep, int fd)			);
 _PROTOTYPE( int do_cancel_fd, (void)					);
 _PROTOTYPE( void close_filp, (struct filp *fp)				);
 /* fscall.c */
 _PROTOTYPE( void nested_fs_call, (message *m)				);
 /* link.c */
 _PROTOTYPE( int do_link, (void)						);
 _PROTOTYPE( int do_unlink, (void)					);
 _PROTOTYPE( int do_rename, (void)					);
 _PROTOTYPE( int do_truncate, (void)					);
 _PROTOTYPE( int do_ftruncate, (void)					);
 _PROTOTYPE( int truncate_vnode, (struct vnode *vp, off_t newsize)	);
 _PROTOTYPE( int rdlink_direct, (char *orig_path, char *link_path,
 						struct fproc *rfp)	);
 /* lock.c */
 _PROTOTYPE( int lock_op, (struct filp *f, int req)			);
 _PROTOTYPE( void lock_revive, (void)					);
 /* main.c */
 _PROTOTYPE( int main, (void)						);
 _PROTOTYPE( void reply, (int whom, int result)				);
 _PROTOTYPE( void lock_proc, (struct fproc *rfp, int force_lock)		);
 _PROTOTYPE( void unlock_proc, (struct fproc *rfp)			);
 _PROTOTYPE( void *do_dummy, (void *arg)					);
 /* misc.c */
 _PROTOTYPE( int do_dup, (void)						);
 _PROTOTYPE( void pm_exit, (int proc)					);
 _PROTOTYPE( int do_fcntl, (void)					);
 _PROTOTYPE( void pm_fork, (int pproc, int cproc, int cpid)		);
 _PROTOTYPE( void pm_setgid, (int proc_e, int egid, int rgid)		);
 _PROTOTYPE( void pm_setuid, (int proc_e, int euid, int ruid)		);
 _PROTOTYPE( void pm_setgroups, (int proc_e, int ngroups, gid_t *addr)	);
 _PROTOTYPE( int do_sync, (void)						);
 _PROTOTYPE( int do_fsync, (void)					);
 _PROTOTYPE( void pm_reboot, (void)					);
 _PROTOTYPE( int do_svrctl, (void)					);
 _PROTOTYPE( int do_getsysinfo, (void)					);
 _PROTOTYPE( int pm_dumpcore, (int proc_e, struct mem_map *seg_ptr)	);
 _PROTOTYPE( void ds_event, (void)					);
 /* mount.c */
 _PROTOTYPE( int do_fsready, (void)                                      );
 _PROTOTYPE( int do_mount, (void)					);
 _PROTOTYPE( int do_umount, (void)					);
 _PROTOTYPE( void mount_pfs, (void)					);
 _PROTOTYPE( int mount_fs, (dev_t dev, char fullpath[PATH_MAX+1],
 				   endpoint_t fs_e, int rdonly,
 				   char mount_label[LABEL_MAX])		);
 _PROTOTYPE( int unmount, (dev_t dev, char *label)			);
 _PROTOTYPE( void unmount_all, (void)					);
 /* open.c */
 _PROTOTYPE( int do_close, (void)					);
 _PROTOTYPE( int close_fd, (struct fproc *rfp, int fd_nr)		);
 _PROTOTYPE( void close_reply, (void)					);
 _PROTOTYPE( int do_creat, (void)					);
 _PROTOTYPE( int do_lseek, (void)					);
 _PROTOTYPE( int do_llseek, (void)					);
 _PROTOTYPE( int do_mknod, (void)					);
 _PROTOTYPE( int do_mkdir, (void)					);
 _PROTOTYPE( int do_open, (void)						);
 _PROTOTYPE( int do_slink, (void)                                        );
 _PROTOTYPE( int do_vm_open, (void)					);
 _PROTOTYPE( int do_vm_close, (void)					);
 /* path.c */
 _PROTOTYPE( struct vnode *advance, (struct vnode *dirp, struct lookup *resolve,
 				    struct fproc *rfp)			);
 _PROTOTYPE( struct vnode *eat_path, (struct lookup *resolve,
 				     struct fproc *rfp)			);
 _PROTOTYPE( struct vnode *last_dir, (struct lookup *resolve,
 				     struct fproc *rfp)			);
 _PROTOTYPE( void lookup_init, (struct lookup *resolve, char *path, int flags,
 			       struct vmnt **vmp, struct vnode **vp)	);
 _PROTOTYPE( int get_name, (struct vnode *dirp, struct vnode *entry,
 							char *_name)	);
 _PROTOTYPE( int canonical_path, (char *orig_path, char *canon_path,
 						struct fproc *rfp)	);
 _PROTOTYPE( int do_check_perms, (void)					);
 /* pipe.c */
 _PROTOTYPE( int do_pipe, (void)						);
 _PROTOTYPE( int map_vnode, (struct vnode *vp, endpoint_t fs_e)		);
 _PROTOTYPE( void unpause, (int proc_nr_e)				);
 _PROTOTYPE( int pipe_check, (struct vnode *vp, int rw_flag,
 	      int oflags, int bytes, u64_t position, int notouch)	);
 _PROTOTYPE( void release, (struct vnode *vp, int call_nr, int count)	);
 _PROTOTYPE( void revive, (int proc_nr, int bytes)			);
 _PROTOTYPE( void suspend, (int task)					);
 _PROTOTYPE( void pipe_suspend, (int rw_flag, int fd_nr, char *buf,
 							size_t size)	);
 _PROTOTYPE( void unsuspend_by_endpt, (endpoint_t)			);
 _PROTOTYPE( void wait_for, (endpoint_t)					);
 #if DO_SANITYCHECKS
 _PROTOTYPE( int check_pipe, (void)					);
 #endif
 /* protect.c */
 _PROTOTYPE( int do_access, (void)					);
 _PROTOTYPE( int do_chmod, (void)					);
 _PROTOTYPE( int do_chown, (void)					);
 _PROTOTYPE( int do_umask, (void)					);
 _PROTOTYPE( int forbidden, (struct vnode *vp, mode_t access_desired)	);
 _PROTOTYPE( int read_only, (struct vnode *vp)				);
 /* read.c */
 _PROTOTYPE( int do_read, (void)						);
 _PROTOTYPE( int do_getdents, (void)					);
 _PROTOTYPE( void lock_bsf, (void)					);
 _PROTOTYPE( void unlock_bsf, (void)					);
 _PROTOTYPE( int read_write, (int rw_flag)				);
 _PROTOTYPE( int rw_pipe, (int rw_flag, endpoint_t usr,
 		int fd_nr, struct filp *f, char *buf, size_t req_size)	);
 /* request.c */
 _PROTOTYPE( int req_breadwrite, (endpoint_t fs_e, endpoint_t user_e,
 			dev_t dev, u64_t pos, unsigned int num_of_bytes,
 			char *user_addr, int rw_flag,
 			u64_t *new_posp, unsigned int *cum_iop)		);
 _PROTOTYPE( int req_chmod, (int fs_e, ino_t inode_nr, mode_t rmode,
 						mode_t *new_modep)	);
 _PROTOTYPE( int req_chown, (endpoint_t fs_e, ino_t inode_nr,
 	uid_t newuid, gid_t newgid, mode_t *new_modep)	);
 _PROTOTYPE( int req_create, (int fs_e, ino_t inode_nr, int omode,
 		uid_t uid, gid_t gid, char *path, node_details_t *res)	);
 _PROTOTYPE( int req_flush, (endpoint_t fs_e, dev_t dev)			);
 _PROTOTYPE( int req_fstatfs, (int fs_e, int who_e, char *buf)		);
 _PROTOTYPE( int req_statvfs, (int fs_e, int who_e, char *buf)		);
 _PROTOTYPE( int req_ftrunc, (endpoint_t fs_e, ino_t inode_nr,
 						off_t start, off_t end)	);
 _PROTOTYPE( int req_getdents, (endpoint_t fs_e, ino_t inode_nr,
 			u64_t pos, char *buf, size_t size,
 			u64_t *new_pos, int direct)			);
 _PROTOTYPE( int req_inhibread, (endpoint_t fs_e, ino_t inode_nr)	);
 _PROTOTYPE( int req_link, (endpoint_t fs_e, ino_t link_parent,
 					char *lastc, ino_t linked_file)	);
 _PROTOTYPE( int req_lookup, (endpoint_t fs_e, ino_t dir_ino, ino_t root_ino,
 			     uid_t uid, gid_t gid, struct lookup *resolve,
 			     lookup_res_t *res, struct fproc *rfp)	);
 _PROTOTYPE( int req_mkdir, (endpoint_t fs_e, ino_t inode_nr,
 	char *lastc, uid_t uid, gid_t gid, mode_t dmode)		);
 _PROTOTYPE( int req_mknod, (endpoint_t fs_e, ino_t inode_nr,
 			char *lastc, uid_t uid, gid_t gid,
 			mode_t dmode, dev_t dev)			);
 _PROTOTYPE( int req_mountpoint, (endpoint_t fs_e, ino_t inode_nr)	);
 _PROTOTYPE( int req_newnode, (endpoint_t fs_e, uid_t uid,
 				gid_t gid, mode_t dmode,
 				dev_t dev, struct node_details *res)	);
 _PROTOTYPE( int req_putnode, (int fs_e, ino_t inode_nr, int count)	);
 _PROTOTYPE( int req_rdlink, (endpoint_t fs_e, ino_t inode_nr,
 				endpoint_t who_e, char *buf, size_t len,
 				int direct)				);
 _PROTOTYPE( int req_readsuper, (endpoint_t fs_e, char *driver_name,
 				dev_t dev, int readonly, int isroot,
 				struct node_details *res_nodep)		);
 _PROTOTYPE( int req_readwrite, (endpoint_t fs_e, ino_t inode_nr,
 				u64_t pos, int rw_flag,
 				endpoint_t user_e, char *user_addr,
 				unsigned int num_of_bytes, u64_t *new_posp,
 				unsigned int *cum_iop)			);
 _PROTOTYPE( int req_rename, (endpoint_t fs_e, ino_t old_dir,
 			char *old_name, ino_t new_dir, char *new_name)	);
 _PROTOTYPE( int req_rmdir, (endpoint_t fs_e, ino_t inode_nr,
 							char *lastc)	);
 _PROTOTYPE(int req_slink, (endpoint_t fs_e, ino_t inode_nr, char *lastc,
 		endpoint_t who_e, char *path_addr,
 		unsigned short path_length, uid_t uid, gid_t gid)	);
 _PROTOTYPE( int req_stat, (int fs_e, ino_t inode_nr, int who_e,
 				char *buf, int pos, int stat_version)	);
 _PROTOTYPE( int req_sync, (endpoint_t fs_e)                             );
 _PROTOTYPE( int req_unlink, (endpoint_t fs_e, ino_t inode_nr,
 							char *lastc)	);
 _PROTOTYPE( int req_unmount, (endpoint_t fs_e)                          );
 _PROTOTYPE( int req_utime, (endpoint_t fs_e, ino_t inode_nr,
 					time_t actime, time_t modtime)	);
 _PROTOTYPE( int req_newdriver, (endpoint_t fs_e, dev_t dev,
            endpoint_t driver_e)                                        );
 /* stadir.c */
 _PROTOTYPE( int do_chdir, (void)					);
 _PROTOTYPE( int do_fchdir, (void)					);
 _PROTOTYPE( int do_chroot, (void)					);
 _PROTOTYPE( int do_fstat, (void)					);
 _PROTOTYPE( int do_stat, (void)						);
 _PROTOTYPE( int do_fstatfs, (void)					);
 _PROTOTYPE( int do_statvfs, (void)					);
 _PROTOTYPE( int do_fstatvfs, (void)					);
 _PROTOTYPE( int do_rdlink, (void)					);
 _PROTOTYPE( int do_lstat, (void)					);
 /* time.c */
 _PROTOTYPE( int do_utime, (void)					);
 /* tll.c */
 _PROTOTYPE( void tll_downgrade, (tll_t *tllp)				);
 _PROTOTYPE( int tll_haspendinglock, (tll_t *tllp)			);
 _PROTOTYPE( void tll_init, (tll_t *tllp)				);
 _PROTOTYPE( int tll_islocked, (tll_t *tllp)				);
 _PROTOTYPE( int tll_lock, (tll_t *tllp, tll_access_t locktype)		);
 _PROTOTYPE( int tll_locked_by_me, (tll_t *tllp)				);
 _PROTOTYPE( void tll_lockstat, (tll_t *tllp)				);
 _PROTOTYPE( int tll_unlock, (tll_t *tllp)				);
 _PROTOTYPE( void tll_upgrade, (tll_t *tllp)				);
 /* utility.c */
 _PROTOTYPE( time_t clock_time, (void)					);
 _PROTOTYPE( unsigned conv2, (int norm, int w)				);
 _PROTOTYPE( long conv4, (int norm, long x)				);
 _PROTOTYPE( int fetch_name, (char *path, int len, int flag, char *dest)		);
 _PROTOTYPE( int no_sys, (void)						);
 _PROTOTYPE( int isokendpt_f, (char *f, int l, endpoint_t e, int *p, int ft));
 _PROTOTYPE( int in_group, (struct fproc *rfp, gid_t grp)		);
 #define okendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 1)
 #define isokendpt(e, p) isokendpt_f(__FILE__, __LINE__, (e), (p), 0)
 /* vmnt.c */
 _PROTOTYPE( void check_vmnt_locks, (void)				);
 _PROTOTYPE( void check_vmnt_locks_by_me, (struct fproc *rfp)		);
 _PROTOTYPE( struct vmnt *get_free_vmnt, (void)				);
 _PROTOTYPE( struct vmnt *find_vmnt, (endpoint_t fs_e)			);
 _PROTOTYPE( struct vmnt *get_locked_vmnt, (struct fproc *rfp)		);
 _PROTOTYPE( void init_vmnts, (void)					);
 _PROTOTYPE( int lock_vmnt, (struct vmnt *vp, tll_access_t locktype)	);
 _PROTOTYPE( void unlock_vmnt, (struct vmnt *vp)				);
 /* vnode.c */
 _PROTOTYPE( void check_vnode_locks, (void)				);
 _PROTOTYPE( void check_vnode_locks_by_me, (struct fproc *rfp)		);
 _PROTOTYPE( struct vnode *get_free_vnode, (void)			);
 _PROTOTYPE( struct vnode *find_vnode, (int fs_e, int numb)              );
 _PROTOTYPE( void init_vnodes, (void)					);
 _PROTOTYPE( int is_vnode_locked, (struct vnode *vp)			);
 _PROTOTYPE( int lock_vnode, (struct vnode *vp, tll_access_t locktype)	);
 _PROTOTYPE( void unlock_vnode, (struct vnode *vp)			);
 _PROTOTYPE( void dup_vnode, (struct vnode *vp)                          );
 _PROTOTYPE( void put_vnode, (struct vnode *vp)				);
 _PROTOTYPE( void vnode_clean_refs, (struct vnode *vp)                   );
 #if DO_SANITYCHECKS
 _PROTOTYPE( int check_vrefs, (void)			);
 #endif
 /* write.c */
 _PROTOTYPE( int do_write, (void)					);
 /* gcov.c */
 _PROTOTYPE( int do_gcov_flush, (void)					);
 #if ! USE_COVERAGE
 #define do_gcov_flush no_sys
 #endif
 /* select.c */
 _PROTOTYPE( int do_select, (void)					);
 _PROTOTYPE( void init_select, (void)					);
 _PROTOTYPE( void select_callback, (struct filp *, int ops)		);
 _PROTOTYPE( void select_forget, (endpoint_t proc_e)				);
 _PROTOTYPE( void select_reply1, (endpoint_t driver_e, int minor, int status));
 _PROTOTYPE( void select_reply2, (endpoint_t driver_e, int minor, int status));
 _PROTOTYPE( void select_timeout_check, (timer_t *)			);
 _PROTOTYPE( void select_unsuspend_by_endpt, (endpoint_t proc)		);
 /* worker.c */
 _PROTOTYPE( int worker_available, (void)				);
 _PROTOTYPE( struct worker_thread *worker_get, (thread_t worker_tid)	);
 _PROTOTYPE( struct job *worker_getjob, (thread_t worker_tid)		);
 _PROTOTYPE( void worker_init, (struct worker_thread *worker)		);
 _PROTOTYPE( struct worker_thread *worker_self, (void)			);
 _PROTOTYPE( void worker_start, (void *(*func)(void *arg))		);
 _PROTOTYPE( void worker_signal, (struct worker_thread *worker)		);
 _PROTOTYPE( void worker_wait, (void)					);
 _PROTOTYPE( void sys_worker_start, (void *(*func)(void *arg))		);
 _PROTOTYPE( void dl_worker_start, (void *(*func)(void *arg))		);
 #endif
--- a/servers/avfs/read.c
+++ b/servers/avfs/read.c
@ -0,0 +1,326 @@
 /* This file contains the heart of the mechanism used to read (and write)
 * files.  Read and write requests are split up into chunks that do not cross
 * block boundaries.  Each chunk is then processed in turn.  Reads on special
 * files are also detected and handled.
 *
 * The entry points into this file are
 *   do_read:	 perform the READ system call by calling read_write
 *   do_getdents: read entries from a directory (GETDENTS)
 *   read_write: actually do the work of READ and WRITE
 *
 */
 #include "fs.h"
 #include <fcntl.h>
 #include <unistd.h>
 #include <minix/com.h>
 #include <minix/u64.h>
 #include "file.h"
 #include "fproc.h"
 #include "param.h"
 #include <dirent.h>
 #include <assert.h>
 #include <minix/vfsif.h>
 #include "vnode.h"
 #include "vmnt.h"
 /*===========================================================================*
 *				do_read					     *
 *===========================================================================*/
 PUBLIC int do_read()
 {
  return(read_write(READING));
 }
 /*===========================================================================*
 *				lock_bsf				     *
 *===========================================================================*/
 PUBLIC void lock_bsf(void)
 {
  message org_m_in;
  struct fproc *org_fp;
  struct worker_thread *org_self;
  if (mutex_trylock(&bsf_lock) == 0)
 	return;
  org_m_in = m_in;
  org_fp = fp;
  org_self = self;
  assert(mutex_lock(&bsf_lock) == 0);
  m_in = org_m_in;
  fp = org_fp;
  self = org_self;
 }
 /*===========================================================================*
 *				unlock_bsf				     *
 *===========================================================================*/
 PUBLIC void unlock_bsf(void)
 {
  assert(mutex_unlock(&bsf_lock) == 0);
 }
 /*===========================================================================*
 *				read_write				     *
 *===========================================================================*/
 PUBLIC int read_write(rw_flag)
 int rw_flag;			/* READING or WRITING */
 {
 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
  register struct filp *f;
  register struct vnode *vp;
  u64_t position, res_pos, new_pos;
  unsigned int cum_io, cum_io_incr, res_cum_io;
  int op, oflags, r, block_spec, char_spec, regular;
  tll_access_t locktype;
  mode_t mode_word;
  /* If the file descriptor is valid, get the vnode, size and mode. */
  if (m_in.nbytes < 0) return(EINVAL);
  locktype = (rw_flag == READING) ? VNODE_READ : VNODE_WRITE;
  if ((f = get_filp(m_in.fd, locktype)) == NULL) return(err_code);
  if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
 	unlock_filp(f);
 	return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
  }
  if (m_in.nbytes == 0) {
 	unlock_filp(f);
 	return(0);	/* so char special files need not check for 0*/
  }
  position = f->filp_pos;
  oflags = f->filp_flags;
  vp = f->filp_vno;
  r = OK;
  cum_io = 0;
  if (vp->v_pipe == I_PIPE) {
 	if (fp->fp_cum_io_partial != 0) {
 		panic("VFS: read_write: fp_cum_io_partial not clear");
 	}
 	r = rw_pipe(rw_flag, who_e, m_in.fd, f, m_in.buffer, m_in.nbytes);
 	unlock_filp(f);
 	return(r);
  }
  op = (rw_flag == READING ? VFS_DEV_READ : VFS_DEV_WRITE);
  mode_word = vp->v_mode & I_TYPE;
  regular = mode_word == I_REGULAR;
  if ((char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0))) {
 	if (vp->v_sdev == NO_DEV)
 		panic("VFS: read_write tries to access char dev NO_DEV");
  }
  if ((block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0))) {
 	if (vp->v_sdev == NO_DEV)
 		panic("VFS: read_write tries to access block dev NO_DEV");
  }
  if (char_spec) {			/* Character special files. */
 	dev_t dev;
 	int suspend_reopen;
 	suspend_reopen = (f->filp_state != FS_NORMAL);
 	dev = (dev_t) vp->v_sdev;
 	r = dev_io(op, dev, who_e, m_in.buffer, position, m_in.nbytes, oflags,
 		   suspend_reopen);
 	if (r >= 0) {
 		cum_io = r;
 		position = add64ul(position, r);
 		r = OK;
 	}
  } else if (block_spec) {		/* Block special files. */
 	lock_bsf();
 	r = req_breadwrite(vp->v_bfs_e, who_e, vp->v_sdev, position,
 		m_in.nbytes, m_in.buffer, rw_flag, &res_pos, &res_cum_io);
 	if (r == OK) {
 		position = res_pos;
 		cum_io += res_cum_io;
 	}
 	unlock_bsf();
  } else {				/* Regular files */
 	if (rw_flag == WRITING && block_spec == 0) {
 		/* Check for O_APPEND flag. */
 		if (oflags & O_APPEND) position = cvul64(vp->v_size);
 	}
 	/* Issue request */
 	r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, rw_flag, who_e,
 			  m_in.buffer, m_in.nbytes, &new_pos, &cum_io_incr);
 	if (r >= 0) {
 		if (ex64hi(new_pos))
 			panic("read_write: bad new pos");
 		position = new_pos;
 		cum_io += cum_io_incr;
 	}
  }
  /* On write, update file size and access time. */
  if (rw_flag == WRITING) {
 	if (regular || mode_word == I_DIRECTORY) {
 		if (cmp64ul(position, vp->v_size) > 0) {
 			if (ex64hi(position) != 0) {
 				panic("read_write: file size too big ");
 			}
 			vp->v_size = ex64lo(position);
 		}
 	}
  }
  f->filp_pos = position;
  unlock_filp(f);
  if (r == OK) return(cum_io);
  return(r);
 }
 /*===========================================================================*
 *				do_getdents				     *
 *===========================================================================*/
 PUBLIC int do_getdents()
 {
 /* Perform the getdents(fd, buf, size) system call. */
  int r = OK;
  u64_t new_pos;
  register struct filp *rfilp;
  /* Is the file descriptor valid? */
  if ( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
  if (!(rfilp->filp_mode & R_BIT))
 	r = EBADF;
  else if ((rfilp->filp_vno->v_mode & I_TYPE) != I_DIRECTORY)
 	r = EBADF;
  if (r == OK) {
 	if (ex64hi(rfilp->filp_pos) != 0)
 		panic("do_getdents: can't handle large offsets");
 	r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
 			 rfilp->filp_pos, m_in.buffer, m_in.nbytes,&new_pos,0);
 	if (r > 0) rfilp->filp_pos = new_pos;
  }
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				rw_pipe					     *
 *===========================================================================*/
 PUBLIC int rw_pipe(rw_flag, usr_e, fd_nr, f, buf, req_size)
 int rw_flag;			/* READING or WRITING */
 endpoint_t usr_e;
 int fd_nr;
 struct filp *f;
 char *buf;
 size_t req_size;
 {
  int r, oflags, partial_pipe = 0;
  size_t size, cum_io, cum_io_incr;
  struct vnode *vp;
  u64_t position, new_pos;
  /* Must make sure we're operating on locked filp and vnode */
  assert(tll_islocked(&f->filp_vno->v_lock));
  assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
  oflags = f->filp_flags;
  vp = f->filp_vno;
  position = cvu64((rw_flag == READING) ? vp->v_pipe_rd_pos :
 							vp->v_pipe_wr_pos);
  /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
  cum_io = fp->fp_cum_io_partial;
  r = pipe_check(vp, rw_flag, oflags, req_size, position, 0);
  if (r <= 0) {
 	if (r == SUSPEND) pipe_suspend(rw_flag, fd_nr, buf, req_size);
 	return(r);
  }
  size = r;
  if (size < req_size) partial_pipe = 1;
  /* Truncate read request at size. */
  if((rw_flag == READING) &&
 	cmp64ul(add64ul(position, size), vp->v_size) > 0) {
 	/* Position always should fit in an off_t (LONG_MAX). */
 	off_t pos32;
 	assert(cmp64ul(position, LONG_MAX) <= 0);
 	pos32 = cv64ul(position);
 	assert(pos32 >= 0);
 	assert(pos32 <= LONG_MAX);
 	size = vp->v_size - pos32;
  }
  if (vp->v_mapfs_e == 0)
 	panic("unmapped pipe");
  r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
 		    buf, size, &new_pos, &cum_io_incr);
  if (r >= 0) {
 	if (ex64hi(new_pos))
 		panic("rw_pipe: bad new pos");
 	position = new_pos;
 	cum_io += cum_io_incr;
 	buf += cum_io_incr;
 	req_size -= cum_io_incr;
  }
  /* On write, update file size and access time. */
  if (rw_flag == WRITING) {
 	if (cmp64ul(position, vp->v_size) > 0) {
 		if (ex64hi(position) != 0) {
 			panic("read_write: file size too big for v_size");
 		}
 		vp->v_size = ex64lo(position);
 	}
  } else {
 	if (cmp64ul(position, vp->v_size) >= 0) {
 		/* Reset pipe pointers */
 		vp->v_size = 0;
 		vp->v_pipe_rd_pos= 0;
 		vp->v_pipe_wr_pos= 0;
 		position = cvu64(0);
 	}
  }
  if (rw_flag == READING)
 	vp->v_pipe_rd_pos= cv64ul(position);
  else
 	vp->v_pipe_wr_pos= cv64ul(position);
  if (r == OK) {
 	if (partial_pipe) {
 		/* partial write on pipe with */
 		/* O_NONBLOCK, return write count */
 		if (!(oflags & O_NONBLOCK)) {
 			/* partial write on pipe with req_size > PIPE_SIZE,
 			 * non-atomic
 			 */
 			fp->fp_cum_io_partial = cum_io;
 			pipe_suspend(rw_flag, fd_nr, buf, req_size);
 			return(SUSPEND);
 		}
 	}
 	fp->fp_cum_io_partial = 0;
 	return(cum_io);
  }
  return(r);
 }
--- a/servers/avfs/request.c
+++ b/servers/avfs/request.c
--- a/servers/avfs/request.h
+++ b/servers/avfs/request.h
@ -0,0 +1,41 @@
 #ifndef __VFS_REQUEST_H__
 #define __VFS_REQUEST_H__
 /* Low level request messages are built and sent by wrapper functions.
 * This file contains the request and response structures for accessing
 * those wrappers functions.
 */
 #include <sys/types.h>
 /* Structure for response that contains inode details */
 typedef struct node_details {
  endpoint_t fs_e;
  ino_t inode_nr;
  mode_t fmode;
  off_t fsize;
  uid_t uid;
  gid_t gid;
  /* For char/block special files */
  dev_t dev;
 } node_details_t;
 /* Structure for a lookup response */
 typedef struct lookup_res {
  endpoint_t fs_e;
  ino_t inode_nr;
  mode_t fmode;
  off_t fsize;
  uid_t uid;
  gid_t gid;
  /* For char/block special files */
  dev_t dev;
  /* Fields used for handling mount point and symbolic links */
  int char_processed;
  unsigned char symloop;
 } lookup_res_t;
 #endif
--- a/servers/avfs/select.c
+++ b/servers/avfs/select.c
--- a/servers/avfs/select.h
+++ b/servers/avfs/select.h
@ -0,0 +1,9 @@
 #ifndef __VFS_SELECT_H__
 #define __VFS_SELECT_H__
 /* return codes for select_request_* and select_cancel_* */
 #define SEL_OK		0	/* ready */
 #define SEL_ERROR	1	/* failed */
 #define SEL_DEFERRED	2	/* request is sent to driver */
 #endif
--- a/servers/avfs/stadir.c
+++ b/servers/avfs/stadir.c
@ -0,0 +1,287 @@
 /* This file contains the code for performing four system calls relating to
 * status and directories.
 *
 * The entry points into this file are
 *   do_chdir:	perform the CHDIR system call
 *   do_chroot:	perform the CHROOT system call
 *   do_lstat:  perform the LSTAT system call
 *   do_stat:	perform the STAT system call
 *   do_fstat:	perform the FSTAT system call
 *   do_fstatfs: perform the FSTATFS system call
 *   do_statvfs: perform the STATVFS system call
 *   do_fstatvfs: perform the FSTATVFS system call
 */
 #include "fs.h"
 #include <sys/stat.h>
 #include <sys/statfs.h>
 #include <minix/com.h>
 #include <minix/u64.h>
 #include <string.h>
 #include "file.h"
 #include "fproc.h"
 #include "path.h"
 #include "param.h"
 #include <minix/vfsif.h>
 #include <minix/callnr.h>
 #include "vnode.h"
 #include "vmnt.h"
 FORWARD _PROTOTYPE( int change, (struct vnode **iip, char *name_ptr, int len));
 FORWARD _PROTOTYPE( int change_into, (struct vnode **iip, struct vnode *vp));
 /*===========================================================================*
 *				do_fchdir				     *
 *===========================================================================*/
 PUBLIC int do_fchdir()
 {
  /* Change directory on already-opened fd. */
  struct filp *rfilp;
  int r;
  /* Is the file descriptor valid? */
  if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
  r = change_into(&fp->fp_wd, rfilp->filp_vno);
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				do_chdir				     *
 *===========================================================================*/
 PUBLIC int do_chdir()
 {
 /* Perform the chdir(name) system call. */
  return change(&fp->fp_wd, m_in.name, m_in.name_length);
 }
 /*===========================================================================*
 *				do_chroot				     *
 *===========================================================================*/
 PUBLIC int do_chroot()
 {
 /* Perform the chroot(name) system call. */
  if (!super_user) return(EPERM);	/* only su may chroot() */
  return change(&fp->fp_rd, m_in.name, m_in.name_length);
 }
 /*===========================================================================*
 *				change					     *
 *===========================================================================*/
 PRIVATE int change(iip, name_ptr, len)
 struct vnode **iip;		/* pointer to the inode pointer for the dir */
 char *name_ptr;			/* pointer to the directory name to change to */
 int len;			/* length of the directory name string */
 {
 /* Do the actual work for chdir() and chroot(). */
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  int r;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  /* Try to open the directory */
  if (fetch_name(name_ptr, len, M3, fullpath) != OK) return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  r = change_into(iip, vp);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
 /*===========================================================================*
 *				change_into				     *
 *===========================================================================*/
 PRIVATE int change_into(iip, vp)
 struct vnode **iip;		/* pointer to the inode pointer for the dir */
 struct vnode *vp;		/* this is what the inode has to become */
 {
  int r;
  if (*iip == vp) return(OK);	/* Nothing to do */
  /* It must be a directory and also be searchable */
  if ((vp->v_mode & I_TYPE) != I_DIRECTORY)
 	r = ENOTDIR;
  else
 	r = forbidden(vp, X_BIT);	/* Check if dir is searchable*/
  if (r != OK) return(r);
  /* Everything is OK.  Make the change. */
  put_vnode(*iip);		/* release the old directory */
  dup_vnode(vp);
  *iip = vp;			/* acquire the new one */
  return(OK);
 }
 /*===========================================================================*
 *				do_stat					     *
 *===========================================================================*/
 PUBLIC int do_stat()
 {
 /* Perform the stat(name, buf) system call. */
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  int old_stat = 0;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  if (call_nr == PREV_STAT)
 	old_stat = 1;
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return r;
 }
 /*===========================================================================*
 *				do_fstat				     *
 *===========================================================================*/
 PUBLIC int do_fstat()
 {
 /* Perform the fstat(fd, buf) system call. */
  register struct filp *rfilp;
  int r;
  int pipe_pos = 0;
  int old_stat = 0;
  if (call_nr == PREV_FSTAT)
 	old_stat = 1;
  /* Is the file descriptor valid? */
  if ((rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
  /* If we read from a pipe, send position too */
  if (rfilp->filp_vno->v_pipe == I_PIPE) {
 	if (rfilp->filp_mode & R_BIT)
 		if (ex64hi(rfilp->filp_pos) != 0) {
 			panic("do_fstat: bad position in pipe");
 		}
 	pipe_pos = ex64lo(rfilp->filp_pos);
  }
  r = req_stat(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
 	       who_e, m_in.buffer, pipe_pos, old_stat);
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				do_fstatfs				     *
 *===========================================================================*/
 PUBLIC int do_fstatfs()
 {
 /* Perform the fstatfs(fd, buf) system call. */
  struct filp *rfilp;
  int r;
  /* Is the file descriptor valid? */
  if( (rfilp = get_filp(m_in.fd, VNODE_READ)) == NULL) return(err_code);
  r = req_fstatfs(rfilp->filp_vno->v_fs_e, who_e, m_in.buffer);
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *				do_statvfs					     *
 *===========================================================================*/
 PUBLIC int do_statvfs()
 {
 /* Perform the stat(name, buf) system call. */
  int r;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  if (fetch_name(m_in.STATVFS_NAME, m_in.STATVFS_LEN, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  r = req_statvfs(vp->v_fs_e, who_e, m_in.STATVFS_BUF);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return r;
 }
 /*===========================================================================*
 *				do_fstatvfs				     *
 *===========================================================================*/
 PUBLIC int do_fstatvfs()
 {
 /* Perform the fstat(fd, buf) system call. */
  register struct filp *rfilp;
  int r;
  /* Is the file descriptor valid? */
  if ((rfilp = get_filp(m_in.FSTATVFS_FD, VNODE_READ)) == NULL)
 	return(err_code);
  r = req_statvfs(rfilp->filp_vno->v_fs_e, who_e, m_in.FSTATVFS_BUF);
  unlock_filp(rfilp);
  return(r);
 }
 /*===========================================================================*
 *                             do_lstat					     *
 *===========================================================================*/
 PUBLIC int do_lstat()
 {
 /* Perform the lstat(name, buf) system call. */
  struct vnode *vp;
  struct vmnt *vmp;
  int r;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  int old_stat = 0;
  lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_READ;
  resolve.l_vnode_lock = VNODE_READ;
  if (call_nr == PREV_LSTAT)
 	old_stat = 1;
  if (fetch_name(m_in.name1, m_in.name1_length, M1, fullpath) != OK)
 	return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  r = req_stat(vp->v_fs_e, vp->v_inode_nr, who_e, m_in.name2, 0, old_stat);
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
--- a/servers/avfs/table.c
+++ b/servers/avfs/table.c
@ -0,0 +1,145 @@
 /* This file contains the table used to map system call numbers onto the
 * routines that perform them.
 */
 #define _TABLE
 #include "fs.h"
 #include <minix/callnr.h>
 #include <minix/com.h>
 #include "file.h"
 #include "fproc.h"
 #include "lock.h"
 #include "vnode.h"
 #include "vmnt.h"
 PUBLIC _PROTOTYPE (int (*call_vec[]), (void) ) = {
 	no_sys,		/*  0 = unused	*/
 	no_sys,		/*  1 = (exit)	*/
 	no_sys,		/*  2 = (fork)	*/
 	do_read,	/*  3 = read	*/
 	do_write,	/*  4 = write	*/
 	do_open,	/*  5 = open	*/
 	do_close,	/*  6 = close	*/
 	no_sys,		/*  7 = wait	*/
 	do_creat,	/*  8 = creat	*/
 	do_link,	/*  9 = link	*/
 	do_unlink,	/* 10 = unlink	*/
 	no_sys,		/* 11 = waitpid	*/
 	do_chdir,	/* 12 = chdir	*/
 	no_sys,		/* 13 = time	*/
 	do_mknod,	/* 14 = mknod	*/
 	do_chmod,	/* 15 = chmod	*/
 	do_chown,	/* 16 = chown	*/
 	no_sys,		/* 17 = break	*/
 	do_stat,	/* 18 = stat (prev)*/
 	do_lseek,	/* 19 = lseek	*/
 	no_sys,		/* 20 = getpid	*/
 	do_mount,	/* 21 = mount	*/
 	do_umount,	/* 22 = umount	*/
 	no_sys,		/* 23 = (setuid) */
 	no_sys,		/* 24 = getuid	*/
 	no_sys,		/* 25 = (stime)	*/
 	no_sys,		/* 26 = ptrace	*/
 	no_sys,		/* 27 = alarm	*/
 	do_fstat,	/* 28 = fstat (prev)*/
 	no_sys,		/* 29 = pause	*/
 	do_utime,	/* 30 = utime	*/
 	no_sys,		/* 31 = (stty)	*/
 	no_sys,		/* 32 = (gtty)	*/
 	do_access,	/* 33 = access	*/
 	no_sys,		/* 34 = (nice)	*/
 	no_sys,		/* 35 = (ftime)	*/
 	do_sync,	/* 36 = sync	*/
 	no_sys,		/* 37 = kill	*/
 	do_rename,	/* 38 = rename	*/
 	do_mkdir,	/* 39 = mkdir	*/
 	do_unlink,	/* 40 = rmdir	*/
 	do_dup,		/* 41 = dup	*/
 	do_pipe,	/* 42 = pipe	*/
 	no_sys,		/* 43 = times	*/
 	no_sys,		/* 44 = (prof)	*/
 	do_slink,	/* 45 = symlink	*/
 	no_sys,		/* 46 = (setgid)*/
 	no_sys,		/* 47 = getgid	*/
 	no_sys,		/* 48 = (signal)*/
 	do_rdlink,	/* 49 = readlink*/
 	do_lstat,	/* 50 = lstat (prev)*/
 	no_sys,		/* 51 = (acct)	*/
 	no_sys,		/* 52 = (phys)	*/
 	no_sys,		/* 53 = (lock)	*/
 	do_ioctl,	/* 54 = ioctl	*/
 	do_fcntl,	/* 55 = fcntl	*/
 	no_sys,		/* 56 = (mpx)	*/
 	do_fsready,	/* 57 = FS proc ready */
 	no_sys,		/* 58 = unused	*/
 	no_sys,		/* 59 = (execve)*/
 	do_umask,	/* 60 = umask	*/
 	do_chroot,	/* 61 = chroot	*/
 	no_sys,		/* 62 = (setsid)*/
 	no_sys,		/* 63 = (getpgrp)*/
 	no_sys,		/* 64 = (itimer)*/
 	do_stat,	/* 65 = stat	*/
 	do_fstat, 	/* 66 = fstat   */
 	do_lstat,	/* 67 = lstat	*/
 	no_sys,		/* 68 = unused	*/
 	no_sys,		/* 69 = unused  */
 	no_sys,		/* 70 = unused  */
 	no_sys,		/* 71 = (sigaction) */
 	no_sys,		/* 72 = (sigsuspend) */
 	no_sys,		/* 73 = (sigpending) */
 	no_sys,		/* 74 = (sigprocmask) */
 	no_sys,		/* 75 = (sigreturn) */
 	no_sys,		/* 76 = (reboot) */
 	do_svrctl,	/* 77 = svrctl */
 	no_sys,		/* 78 = (sysuname) */
 	do_getsysinfo,  /* 79 = getsysinfo */
 	do_getdents,	/* 80 = getdents */
 	do_llseek,	/* 81 = llseek */
 	do_fstatfs,	/* 82 = fstatfs */
 	do_statvfs,		/* 83 = fstatvfs */
 	do_fstatvfs,		/* 84 = statvfs */
 	do_select,	/* 85 = select */
 	do_fchdir,	/* 86 = fchdir */
 	do_fsync,	/* 87 = fsync */
 	no_sys,		/* 88 = (getpriority) */
 	no_sys,		/* 89 = (setpriority) */
 	no_sys,		/* 90 = (gettimeofday) */
 	no_sys,		/* 91 = (seteuid) */
 	no_sys,		/* 92 = (setegid) */
 	do_truncate,	/* 93 = truncate */
 	do_ftruncate,	/* 94 = truncate */
 	do_chmod,	/* 95 = fchmod */
 	do_chown,	/* 96 = fchown */
 	no_sys,		/* 97 = (getsysinfo_up) */
 	no_sys,		/* 98 = (sprofile) */
 	no_sys,		/* 99 = (cprofile) */
 	/* THE MINIX3 ABI ENDS HERE */
 	no_sys,		/* 100 = (exec_newmem) */
 	no_sys,		/* 101 = (srv_fork) */
 	no_sys,		/* 102 = (exec_restart) */
 	no_sys,		/* 103 = (procstat) */
 	no_sys,		/* 104 = (getprocnr) */
 	no_sys,		/* 105 = unused */
 	no_sys,		/* 106 = unused */
 	no_sys,		/* 107 = (getepinfo) */
 	no_sys,		/* 108 = (adddma) */
 	no_sys,		/* 109 = (deldma) */
 	no_sys,		/* 110 = (getdma) */
 	no_sys,		/* 111 = (srv_kill) */
 	do_gcov_flush,	/* 112 = gcov_flush */
 	no_sys,		/* 113 = (getsid) */
 };
 /* This should not fail with "array size is negative": */
 extern int dummy[sizeof(call_vec) == NCALLS * sizeof(call_vec[0]) ? 1 : -1];
 PUBLIC _PROTOTYPE (int (*pfs_call_vec[]), (void) ) = {
 	no_sys,		/* 0 */
 	do_check_perms,	/* 1 */
 	do_verify_fd,	/* 2 */
 	do_set_filp,	/* 3 */
 	do_copy_filp,	/* 4 */
 	do_put_filp,	/* 5 */
 	do_cancel_fd	/* 6 */
 };
--- a/servers/avfs/threads.h
+++ b/servers/avfs/threads.h
@ -0,0 +1,35 @@
 #ifndef __VFS_WORKERS_H__
 #define __VFS_WORKERS_H__
 #include <minix/mthread.h>
 #include "job.h"
 #define thread_t	mthread_thread_t
 #define mutex_t		mthread_mutex_t
 #define cond_t		mthread_cond_t
 #define attr_t		mthread_attr_t
 #define threads_init	mthread_init
 #define yield		mthread_yield
 #define yield_all	mthread_yield_all
 #define mutex_init	mthread_mutex_init
 #define mutex_destroy	mthread_mutex_destroy
 #define mutex_lock	mthread_mutex_lock
 #define mutex_trylock	mthread_mutex_trylock
 #define mutex_unlock	mthread_mutex_unlock
 #define cond_init	mthread_cond_init
 #define cond_destroy	mthread_cond_destroy
 #define cond_wait	mthread_cond_wait
 #define cond_signal	mthread_cond_signal
 struct worker_thread {
  thread_t w_tid;
  mutex_t w_event_mutex;
  cond_t w_event;
  struct job w_job;
  struct fproc *w_fp;
  struct worker_thread *w_next;
 };
 #endif
--- a/servers/avfs/time.c
+++ b/servers/avfs/time.c
@ -0,0 +1,66 @@
 /* This file takes care of those system calls that deal with time.
 *
 * The entry points into this file are
 *   do_utime:		perform the UTIME system call
 */
 #include "fs.h"
 #include <minix/callnr.h>
 #include <minix/com.h>
 #include "file.h"
 #include "fproc.h"
 #include "path.h"
 #include "param.h"
 #include "vnode.h"
 #include <minix/vfsif.h>
 #include "vmnt.h"
 /*===========================================================================*
 *				do_utime				     *
 *===========================================================================*/
 PUBLIC int do_utime()
 {
 /* Perform the utime(name, timep) system call. */
  register int len;
  int r;
  time_t actime, modtime;
  struct vnode *vp;
  struct vmnt *vmp;
  char fullpath[PATH_MAX+1];
  struct lookup resolve;
  lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp);
  resolve.l_vmnt_lock = VMNT_WRITE;
  resolve.l_vnode_lock = VNODE_READ;
  /* Adjust for case of 'timep' being NULL;
   * utime_strlen then holds the actual size: strlen(name)+1 */
  len = m_in.utime_length;
  if (len == 0) len = m_in.utime_strlen;
  /* Temporarily open the file */
  if (fetch_name(m_in.utime_file, len, M1, fullpath) != OK) return(err_code);
  if ((vp = eat_path(&resolve, fp)) == NULL) return(err_code);
  /* Only the owner of a file or the super user can change its name. */
  r = OK;
  if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) r = EPERM;
  if (m_in.utime_length == 0 && r != OK) r = forbidden(vp, W_BIT);
  if (read_only(vp) != OK) r = EROFS; /* Not even su can touch if R/O */
  if (r == OK) {
 	/* Issue request */
 	if(m_in.utime_length == 0) {
 		actime = modtime = clock_time();
 	} else {
 		actime = m_in.utime_actime;
 		modtime = m_in.utime_modtime;
 	}
 	r = req_utime(vp->v_fs_e, vp->v_inode_nr, actime, modtime);
  }
  unlock_vnode(vp);
  unlock_vmnt(vmp);
  put_vnode(vp);
  return(r);
 }
--- a/servers/avfs/tll.c
+++ b/servers/avfs/tll.c
@ -0,0 +1,310 @@
 /* This file contains the implementation of the three-level-lock. */
 #include "fs.h"
 #include "glo.h"
 #include "tll.h"
 #include "threads.h"
 #include <assert.h>
 FORWARD _PROTOTYPE( int tll_append, (tll_t *tllp, tll_access_t locktype));
 PRIVATE int tll_append(tll_t *tllp, tll_access_t locktype)
 {
  struct worker_thread *queue;
  assert(self != NULL);
  assert(tllp != NULL);
  assert(locktype != TLL_NONE);
  /* Read-only and write-only requests go to the write queue. Read-serialized
   * requests go to the serial queue. Then we wait for an event to signal it's
   * our turn to go. */
  queue = NULL;
  if (locktype == TLL_READ || locktype == TLL_WRITE) {
 	if (tllp->t_write == NULL)
 		tllp->t_write = self;
 	else
 		queue = tllp->t_write;
  } else {
 	if (tllp->t_serial == NULL)
 		tllp->t_serial = self;
 	else
 		queue = tllp->t_serial;
  }
  if (queue != NULL) {	/* Traverse to end of queue */
 	while (queue->w_next != NULL) queue = queue->w_next;
 	queue->w_next = self;
  }
  self->w_next = NULL; /* End of queue */
  /* Now wait for the event it's our turn */
  worker_wait();
  tllp->t_current = locktype;
  tllp->t_status &= ~TLL_PEND;
  tllp->t_owner = self;
  if (tllp->t_current == TLL_READ) {
 	tllp->t_readonly++;
 	tllp->t_owner = NULL;
  }
  if (verbose) {
 	printf("got lock on tllp=%p with type %d (self=%p)\n", tllp,
 			locktype, self);
  }
  /* Due to the way upgrading and downgrading works, read-only requests are
   * scheduled to run after a downgraded lock is released (because they are
   * queued on the write-only queue which has priority). This results from the
   * fact that the downgrade operation cannot know whether the next locktype on
   * the write-only queue is really write-only or actually read-only. However,
   * that means that read-serialized requests stay queued, while they could run
   * simultaneously with read-only requests. See if there are any and grant
   * the head request access */
  if (tllp->t_current == TLL_READ && tllp->t_serial != NULL) {
 	tllp->t_owner = tllp->t_serial;
 	tllp->t_serial = tllp->t_serial->w_next;
 	tllp->t_owner->w_next = NULL;
 	assert(!(tllp->t_status & TLL_PEND));
 	tllp->t_status |= TLL_PEND;
 	worker_signal(tllp->t_owner);
  }
  return(OK);
 }
 PUBLIC void tll_downgrade(tll_t *tllp)
 {
 /* Downgrade three-level-lock tll from write-only to read-serialized, or from
 * read-serialized to read-only. Caveat: as we can't know whether the next
 * lock type on the write queue is actually read-only or write-only, we can't
 * grant access to that type. It will be granted access once we unlock. Also,
 * because we apply write-bias, we can't grant access to read-serialized
 * either, unless nothing is queued on the write-only stack. */
  assert(self != NULL);
  assert(tllp != NULL);
  assert(tllp->t_owner == self);
  switch(tllp->t_current) {
    case TLL_WRITE: tllp->t_current = TLL_READSER; break;
    case TLL_READSER:
 	/* If nothing is queued on write-only, but there is a pending lock
 	 * requesting read-serialized, grant it and keep the lock type. */
 	if (tllp->t_write == NULL && tllp->t_serial != NULL) {
 		tllp->t_owner = tllp->t_serial;
 		tllp->t_serial = tllp->t_serial->w_next; /* Remove head */
 		tllp->t_owner->w_next = NULL;
 		assert(!(tllp->t_status & TLL_PEND));
 		tllp->t_status |= TLL_PEND;
 		worker_signal(tllp->t_owner);
 	} else {
 		tllp->t_current = TLL_READ;
 		tllp->t_owner = NULL;
 	}
 	tllp->t_readonly++; /* Either way, there's one more read-only lock */
 	break;
    default: panic("VFS: Incorrect lock state");
  }
 }
 PUBLIC void tll_init(tll_t *tllp)
 {
 /* Initialize three-level-lock tll */
  assert(tllp != NULL);
  tllp->t_current = TLL_NONE;
  tllp->t_readonly = 0;
  tllp->t_status = TLL_DFLT;
  tllp->t_write = NULL;
  tllp->t_serial = NULL;
  tllp->t_owner = NULL;
 }
 PUBLIC int tll_islocked(tll_t *tllp)
 {
  return(tllp->t_current != TLL_NONE);
 }
 PUBLIC int tll_locked_by_me(tll_t *tllp)
 {
  assert(self != NULL);
  return(tllp->t_owner == self && !(tllp->t_status & TLL_PEND));
 }
 PUBLIC int tll_lock(tll_t *tllp, tll_access_t locktype)
 {
 /* Try to lock three-level-lock tll with type locktype */
  assert(self != NULL);
  assert(tllp != NULL);
  assert(locktype != TLL_NONE);
  self->w_next = NULL;
  if (locktype != TLL_READ && locktype != TLL_READSER && locktype != TLL_WRITE)
 	panic("Invalid lock type %d\n", locktype);
  /* If this locking has pending locks, we wait */
  if (tllp->t_status & TLL_PEND)
 	return tll_append(tllp, locktype);
  /* If we already own this lock don't lock it again and return immediately */
  if (tllp->t_owner == self) {
 	assert(tllp->t_status == TLL_DFLT);
 	return(EBUSY);
  }
  /* If this lock is not accessed by anyone, locktype is granted off the bat */
  if (tllp->t_current == TLL_NONE) {
 	tllp->t_current = locktype;
 	if (tllp->t_current == TLL_READ)
 		tllp->t_readonly = 1;
 	else { /* Record owner if locktype is read-serialized or write-only */
 		tllp->t_owner = self;
 	}
 	return(OK);
  }
  /* If the current lock is write-only, we have to wait for that lock to be
   * released (regardless of the value of locktype). */
  if (tllp->t_current == TLL_WRITE)
 	return tll_append(tllp, locktype);
  /* However, if it's not and we're requesting a write-only lock, we have to
   * wait until the last read access is released (additional read requests
   * after this write-only requests are to be queued) */
  if (locktype == TLL_WRITE)
 	return tll_append(tllp, locktype);
  /* We have to queue read and read-serialized requests if we have a write-only
   * request queued ("write bias") or when a read-serialized lock is trying to
   * upgrade to write-only. The current lock for this tll is either read or
   * read-serialized. */
  if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR))
 	return tll_append(tllp, locktype);
  /* If this lock is in read-serialized mode, we can allow read requests and
   * queue read-serialized requests */
  if (tllp->t_current == TLL_READSER) {
 	if (locktype == TLL_READ) {
 		tllp->t_readonly++;
 		return(OK);
 	} else
 		return tll_append(tllp, locktype);
  }
  /* Finally, if the current lock is read-only, we can change it to
   * read-serialized if necessary without a problem. */
  tllp->t_current = locktype; /* Either read-only or read-serialized */
  if (tllp->t_current == TLL_READ) {	/* We now have an additional reader */
 	tllp->t_readonly++;
 	tllp->t_owner = NULL;
  } else {
 	assert(tllp->t_current != TLL_WRITE);
 	tllp->t_owner = self;		/* We now have a new owner */
 	self->w_next = NULL;
  }
  return(OK);
 }
 PUBLIC int tll_haspendinglock(tll_t *tllp)
 {
 /* Is someone trying to obtain a lock? */
  assert(tllp != NULL);
  /* Someone is trying to obtain a lock if either the write/read-only queue or
   * the read-serialized queue is not empty. */
  return(tllp->t_write != NULL || tllp->t_serial != NULL);
 }
 PUBLIC int tll_unlock(tll_t *tllp)
 {
 /* Unlock a previously locked three-level-lock tll */
  int signal_owner = 0;
  assert(self != NULL);
  assert(tllp != NULL);
  if (tllp->t_owner == NULL || tllp->t_owner != self) {
 	/* This unlock must have been done by a read-only lock */
 	tllp->t_readonly--;
 	assert(tllp->t_readonly >= 0);
 	/* If a read-serialized lock is trying to upgrade and there are no more
 	 * read-only locks, the lock can now be upgraded to write-only */
 	if ((tllp->t_status & TLL_UPGR) && tllp->t_readonly == 0)
 		signal_owner = 1;
  }
  if(tllp->t_owner == self || (tllp->t_owner == NULL && tllp->t_readonly == 0)){
 	/* Let another read-serialized or write-only request obtain access.
 	 * Write-only has priority, but only after the last read-only access
 	 * has left. Read-serialized access will only be granted if there is
 	 * no pending write-only access request. */
 	struct worker_thread *new_owner;
 	new_owner = NULL;
 	tllp->t_owner = NULL;	/* Remove owner of lock */
 	if (tllp->t_write != NULL) {
 		if (tllp->t_readonly == 0) {
 			new_owner = tllp->t_write;
 			tllp->t_write = tllp->t_write->w_next;
 		}
 	} else if (tllp->t_serial != NULL) {
 		new_owner = tllp->t_serial;
 		tllp->t_serial = tllp->t_serial->w_next;
 	}
 	/* New owner is head of queue or NULL if no proc is available */
 	if (new_owner != NULL) {
 		tllp->t_owner = new_owner;
 		tllp->t_owner->w_next = NULL;
 		assert(tllp->t_owner != self);
 		signal_owner = 1;
 	}
  }
  /* If no one is using this lock, mark it as not in use */
  if (tllp->t_owner == NULL && tllp->t_readonly == 0)
 	tllp->t_current = TLL_NONE;
  if (tllp->t_current == TLL_NONE || tllp->t_current == TLL_READ) {
 	if (!signal_owner) {
 		tllp->t_owner = NULL;
 	}
  }
  /* If we have a new owner or the current owner managed to upgrade its lock,
   * tell it to start/continue running */
  if (signal_owner) {
 	assert(!(tllp->t_status & TLL_PEND));
 	tllp->t_status |= TLL_PEND;
 	worker_signal(tllp->t_owner);
  }
  return(OK);
 }
 PUBLIC void tll_upgrade(tll_t *tllp)
 {
 /* Upgrade three-level-lock tll from read-serialized to write-only */
  assert(self != NULL);
  assert(tllp != NULL);
  assert(tllp->t_owner == self);
  assert(tllp->t_current != TLL_READ); /* i.e., read-serialized or write-only*/
  if (tllp->t_current == TLL_WRITE) return;	/* Nothing to do */
  if (tllp->t_readonly != 0) {		/* Wait for readers to leave */
 	assert(!(tllp->t_status & TLL_UPGR));
 	tllp->t_status |= TLL_UPGR;
 	worker_wait();
 	tllp->t_status &= ~TLL_UPGR;
 	tllp->t_status &= ~TLL_PEND;
 	assert(tllp->t_readonly == 0);
  }
  tllp->t_current = TLL_WRITE;
 }
--- a/servers/avfs/tll.h
+++ b/servers/avfs/tll.h
@ -0,0 +1,20 @@
 #ifndef __VFS_TLL_H__
 #define __VFS_TLL_H__
 /* Three-level-lock. Allows read-only, read-serialized, and write-only locks */
 typedef enum { TLL_NONE, TLL_READ, TLL_READSER, TLL_WRITE } tll_access_t;
 typedef enum { TLL_DFLT = 0x0, TLL_UPGR = 0x1, TLL_PEND = 0x2 } tll_status_t;
 typedef struct {
  tll_access_t t_current;	/* Current type of access to lock */
  struct worker_thread *t_owner;/* Owner of non-read-only lock */
  signed int t_readonly;	/* No. of current read-only access */
  tll_status_t t_status;	/* Lock status; nothing, pending upgrade, or
 				 * pending upgrade of read-serialized to
 				 * write-only */
  struct worker_thread *t_write;/* Write/read-only access requestors queue */
  struct worker_thread *t_serial;/* Read-serialized access requestors queue */
 } tll_t;
 #endif
--- a/servers/avfs/utility.c
+++ b/servers/avfs/utility.c
@ -0,0 +1,153 @@
 /* This file contains a few general purpose utility routines.
 *
 * The entry points into this file are
 *   clock_time:  ask the clock task for the real time
 *   copy:	  copy a block of data
 *   fetch_name:  go get a path name from user space
 *   no_sys:      reject a system call that FS does not handle
 *   panic:       something awful has occurred;  MINIX cannot continue
 *   conv2:	  do byte swapping on a 16-bit int
 *   conv4:	  do byte swapping on a 32-bit long
 *   in_group:    determines if group 'grp' is in rfp->fp_sgroups[]
 */
 #include "fs.h"
 #include <minix/com.h>
 #include <minix/endpoint.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <assert.h>
 #include "file.h"
 #include "fproc.h"
 #include "param.h"
 #include "vmnt.h"
 /*===========================================================================*
 *				fetch_name				     *
 *===========================================================================*/
 PUBLIC int fetch_name(path, len, flag, dest)
 char *path;			/* pointer to the path in user space */
 int len;			/* path length, including 0 byte */
 int flag;			/* M3 means path may be in message */
 char *dest;			/* pointer to where path is to be stored */
 {
 /* Go get path and put it in 'user_fullpath'.
 * If 'flag' = M3 and 'len' <= M3_STRING, the path is present in 'message'.
 * If it is not, go copy it from user space.
 */
  register char *rpu, *rpm;
  int r, count;
  if (len > PATH_MAX) {
 	err_code = ENAMETOOLONG;
 	return(EGENERIC);
  }
  /* Check name length for validity. */
  if (len <= 0) {
 	err_code = EINVAL;
 	return(EGENERIC);
  }
  if (flag == M3 && len <= M3_STRING) {
 	/* Just copy the path from the message to 'user_fullpath'. */
 	rpu = &dest[0];
 	rpm = m_in.pathname;		/* contained in input message */
 	count = len;
 	do { *rpu++ = *rpm++; } while (--count);
 	r = OK;
  } else {
 	/* String is not contained in the message.  Get it from user space. */
 	r = sys_datacopy(who_e, (vir_bytes) path,
 		VFS_PROC_NR, (vir_bytes) dest, (phys_bytes) len);
  }
  if (dest[len - 1] != '\0') {
 	err_code = ENAMETOOLONG;
 	return(EGENERIC);
  }
  return(r);
 }
 /*===========================================================================*
 *				no_sys					     *
 *===========================================================================*/
 PUBLIC int no_sys()
 {
 /* Somebody has used an illegal system call number */
  return(ENOSYS);
 }
 /*===========================================================================*
 *				isokendpt_f				     *
 *===========================================================================*/
 PUBLIC int isokendpt_f(char *file, int line, endpoint_t endpoint, int *proc, int fatal)
 {
  int failed = 0;
  endpoint_t ke;
  *proc = _ENDPOINT_P(endpoint);
  if (endpoint == NONE) {
 	printf("VFS %s:%d: endpoint is NONE\n", file, line);
 	failed = 1;
  } else if (*proc < 0 || *proc >= NR_PROCS) {
 	printf("VFS %s:%d: proc (%d) from endpoint (%d) out of range\n",
 		file, line, *proc, endpoint);
 	failed = 1;
  } else if ((ke = fproc[*proc].fp_endpoint) != endpoint) {
 	if(ke == NONE) {
 		printf("VFS %s:%d: endpoint (%d) points to NONE slot (%d)\n",
 			file, line, endpoint, *proc);
 		assert(fproc[*proc].fp_pid == PID_FREE);
 	} else {
 		printf("VFS %s:%d: proc (%d) from endpoint (%d) doesn't match "
 			"known endpoint (%d)\n", file, line, *proc, endpoint,
 			fproc[*proc].fp_endpoint);
 		assert(fproc[*proc].fp_pid != PID_FREE);
 	}
 	failed = 1;
  }
  if(failed && fatal)
 	panic("isokendpt_f failed");
  return(failed ? EDEADEPT : OK);
 }
 /*===========================================================================*
 *				clock_time				     *
 *===========================================================================*/
 PUBLIC time_t clock_time()
 {
 /* This routine returns the time in seconds since 1.1.1970.  MINIX is an
 * astrophysically naive system that assumes the earth rotates at a constant
 * rate and that such things as leap seconds do not exist.
 */
  register int r;
  clock_t uptime;
  time_t boottime;
  r = getuptime2(&uptime, &boottime);
  if (r != OK)
 	panic("clock_time err: %d", r);
  return( (time_t) (boottime + (uptime/system_hz)));
 }
 /*===========================================================================*
 *                              in_group                                     *
 *===========================================================================*/
 PUBLIC int in_group(struct fproc *rfp, gid_t grp)
 {
  int i;
  for (i = 0; i < rfp->fp_ngroups; i++)
 	if (rfp->fp_sgroups[i] == grp)
 		return(OK);
  return(EINVAL);
 }
--- a/servers/avfs/vmnt.c
+++ b/servers/avfs/vmnt.c
@ -0,0 +1,168 @@
 /* Virtual mount table related routines.
 *
 */
 #include "fs.h"
 #include "threads.h"
 #include "vmnt.h"
 #include <assert.h>
 #include "fproc.h"
 FORWARD _PROTOTYPE( int is_vmnt_locked, (struct vmnt *vmp)		);
 /* Is vmp pointer reasonable? */
 #define SANEVMP(v) ((((v) >= &vmnt[0] && (v) < &vmnt[NR_MNTS])))
 #define BADVMP(v, f, l) printf("%s:%d: bad vmp %p\n", f, l, v)
 /* vp check that panics */
 #define ASSERTVMP(v) if(!SANEVMP(v)) { \
 	BADVMP(v, __FILE__, __LINE__); panic("bad vmp"); }
 #if LOCK_DEBUG
 /*===========================================================================*
 *				check_vmnt_locks_by_me			     *
 *===========================================================================*/
 PUBLIC void check_vmnt_locks_by_me(struct fproc *rfp)
 {
 /* Check whether this thread still has locks held on vmnts */
  struct vmnt *vmp;
  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++) {
 	if (tll_locked_by_me(&vmp->m_lock))
 		panic("Thread %d still holds vmnt lock on vmp %p call_nr=%d\n",
 		      mthread_self(), vmp, call_nr);
  }
  if (rfp->fp_vmnt_rdlocks != 0)
 	panic("Thread %d still holds read locks on a vmnt (%d) call_nr=%d\n",
 	      mthread_self(), rfp->fp_vmnt_rdlocks, call_nr);
 }
 #endif
 /*===========================================================================*
 *				check_vmnt_locks			     *
 *===========================================================================*/
 PUBLIC void check_vmnt_locks()
 {
  struct vmnt *vmp;
  int count = 0;
  for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; vmp++)
 	if (is_vmnt_locked(vmp)) {
 		count++;
 		printf("vmnt %p is %s, fs_e=%d dev=%d\n", vmp, (tll_islocked(&vmp->m_lock) ? "locked":"pending locked"), vmp->m_fs_e, vmp->m_dev);
 	}
  if (count) panic("%d locked vmnts\n", count);
 #if 0
  printf("check_vmnt_locks OK\n");
 #endif
 }
 /*===========================================================================*
 *                             get_free_vmnt				     *
 *===========================================================================*/
 PUBLIC struct vmnt *get_free_vmnt(void)
 {
  struct vmnt *vp;
  for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp)
      if (vp->m_dev == NO_DEV) return(vp);
  return(NULL);
 }
 /*===========================================================================*
 *                             find_vmnt				     *
 *===========================================================================*/
 PUBLIC struct vmnt *find_vmnt(endpoint_t fs_e)
 {
 /* Find the vmnt belonging to an FS with endpoint 'fs_e' iff it's in use */
  struct vmnt *vp;
  for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; ++vp)
 	if (vp->m_fs_e == fs_e && vp->m_dev != NO_DEV)
 		return(vp);
  return(NULL);
 }
 /*===========================================================================*
 *                             init_vmnts				     *
 *===========================================================================*/
 PUBLIC void init_vmnts(void)
 {
 /* Initialize vmnt table */
  struct vmnt *vp;
  for (vp = &vmnt[0]; vp < &vmnt[NR_MNTS]; vp++) {
 	vp->m_fs_e = NONE;
 	vp->m_dev = NO_DEV;
 	vp->m_flags = 0;
 	vp->m_mounted_on = NULL;
 	vp->m_root_node = NULL;
 	vp->m_label[0] = '\0';
 	vp->m_comm.c_max_reqs = 1;
 	vp->m_comm.c_cur_reqs = 0;
 	vp->m_comm.c_req_queue = NULL;
 	tll_init(&vp->m_lock);
  }
 }
 /*===========================================================================*
 *                             is_vmnt_locked				     *
 *===========================================================================*/
 PRIVATE int is_vmnt_locked(struct vmnt *vmp)
 {
  ASSERTVMP(vmp);
  return(tll_islocked(&vmp->m_lock) || tll_haspendinglock(&vmp->m_lock));
 }
 /*===========================================================================*
 *                             lock_vmnt				     *
 *===========================================================================*/
 PUBLIC int lock_vmnt(struct vmnt *vmp, tll_access_t locktype)
 {
  int r;
  tll_access_t initial_locktype;
  ASSERTVMP(vmp);
  initial_locktype = (locktype == VMNT_EXCL) ? VMNT_WRITE : locktype;
  r = tll_lock(&vmp->m_lock, initial_locktype);
  if (r == EBUSY) return(r);
  if (initial_locktype != locktype) {
 	tll_upgrade(&vmp->m_lock);
  }
 #if LOCK_DEBUG
  if (locktype == VMNT_READ)
 	fp->fp_vmnt_rdlocks++;
 #endif
  return(OK);
 }
 /*===========================================================================*
 *                             unlock_vmnt				     *
 *===========================================================================*/
 PUBLIC void unlock_vmnt(struct vmnt *vmp)
 {
  ASSERTVMP(vmp);
 #if LOCK_DEBUG
  /* Decrease read-only lock counter when not locked as VMNT_WRITE or
   * VMNT_EXCL */
  if (!tll_locked_by_me(&vmp->m_lock))
 	fp->fp_vmnt_rdlocks--;
 #endif
  tll_unlock(&vmp->m_lock);
 #if LOCK_DEBUG
  assert(!tll_locked_by_me(&vmp->m_lock));
 #endif
 }
--- a/servers/avfs/vmnt.h
+++ b/servers/avfs/vmnt.h
@ -0,0 +1,24 @@
 #ifndef __VFS_VMNT_H__
 #define __VFS_VMNT_H__
 EXTERN struct vmnt {
  int m_fs_e;			/* FS process' kernel endpoint */
  tll_t m_lock;
  comm_t m_comm;
  dev_t m_dev;			/* device number */
  unsigned int m_flags;		/* mount flags */
  struct vnode *m_mounted_on;	/* vnode on which the partition is mounted */
  struct vnode *m_root_node;	/* root vnode */
  char m_label[LABEL_MAX];	/* label of the file system process */
 } vmnt[NR_MNTS];
 /* vmnt flags */
 #define VMNT_READONLY		01	/* Device mounted readonly */
 #define VMNT_BACKCALL		02	/* FS did back call */
 /* vmnt lock types mapping */
 #define VMNT_READ TLL_READ
 #define VMNT_WRITE TLL_READSER
 #define VMNT_EXCL TLL_WRITE
 #endif
--- a/servers/avfs/vnode.c
+++ b/servers/avfs/vnode.c
@ -0,0 +1,387 @@
 /* This file contains the routines related to vnodes.
 * The entry points are:
 *
 *  get_vnode - increase counter and get details of an inode
 *  get_free_vnode - get a pointer to a free vnode obj
 *  find_vnode - find a vnode according to the FS endpoint and the inode num.
 *  dup_vnode - duplicate vnode (i.e. increase counter)
 *  put_vnode - drop vnode (i.e. decrease counter)
 */
 #include "fs.h"
 #include "threads.h"
 #include "vnode.h"
 #include "vmnt.h"
 #include "fproc.h"
 #include "file.h"
 #include <minix/vfsif.h>
 #include <assert.h>
 /* Is vnode pointer reasonable? */
 #if NDEBUG
 #define SANEVP(v)
 #define CHECKVN(v)
 #define ASSERTVP(v)
 #else
 #define SANEVP(v) ((((v) >= &vnode[0] && (v) < &vnode[NR_VNODES])))
 #define BADVP(v, f, l) printf("%s:%d: bad vp %p\n", f, l, v)
 /* vp check that returns 0 for use in check_vrefs() */
 #define CHECKVN(v) if(!SANEVP(v)) {				\
 	BADVP(v, __FILE__, __LINE__);	\
 	return 0;	\
 }
 /* vp check that panics */
 #define ASSERTVP(v) if(!SANEVP(v)) { \
 	BADVP(v, __FILE__, __LINE__); panic("bad vp"); }
 #endif
 #if LOCK_DEBUG
 /*===========================================================================*
 *				check_vnode_locks_by_me			     *
 *===========================================================================*/
 PUBLIC void check_vnode_locks_by_me(struct fproc *rfp)
 {
 /* Check whether this thread still has locks held on vnodes */
  struct vnode *vp;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++) {
 	if (tll_locked_by_me(&vp->v_lock)) {
 		panic("Thread %d still holds vnode lock on vp %x call_nr=%d\n",
 		      mthread_self(), vp, call_nr);
 	}
  }
  if (rfp->fp_vp_rdlocks != 0)
 	panic("Thread %d still holds read locks on a vnode (%d) call_nr=%d\n",
 	      mthread_self(), rfp->fp_vp_rdlocks, call_nr);
 }
 #endif
 /*===========================================================================*
 *				check_vnode_locks			     *
 *===========================================================================*/
 PUBLIC void check_vnode_locks()
 {
  struct vnode *vp;
  int count = 0;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; vp++)
 	if (is_vnode_locked(vp)) {
 		count++;
 	}
  if (count) panic("%d locked vnodes\n", count);
 #if 0
  printf("check_vnode_locks OK\n");
 #endif
 }
 /*===========================================================================*
 *				get_free_vnode				     *
 *===========================================================================*/
 PUBLIC struct vnode *get_free_vnode()
 {
 /* Find a free vnode slot in the vnode table (it's not actually allocated) */
  struct vnode *vp;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
 	if (vp->v_ref_count == 0 && !is_vnode_locked(vp)) {
 		vp->v_pipe = NO_PIPE;
 		vp->v_uid  = -1;
 		vp->v_gid  = -1;
 		vp->v_sdev = NO_DEV;
 		vp->v_mapfs_e = NONE;
 		vp->v_mapfs_count = 0;
 		vp->v_mapinode_nr = 0;
 		return(vp);
 	}
  }
  err_code = ENFILE;
  return(NULL);
 }
 /*===========================================================================*
 *				find_vnode				     *
 *===========================================================================*/
 PUBLIC struct vnode *find_vnode(int fs_e, int ino)
 {
 /* Find a specified (FS endpoint and inode number) vnode in the
 * vnode table */
  struct vnode *vp;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
 	if (vp->v_ref_count > 0 && vp->v_inode_nr == ino && vp->v_fs_e == fs_e)
 		return(vp);
  return(NULL);
 }
 /*===========================================================================*
 *				is_vnode_locked				     *
 *===========================================================================*/
 PUBLIC int is_vnode_locked(struct vnode *vp)
 {
 /* Find out whether a thread holds a lock on this vnode or is trying to obtain
 * a lock. */
  ASSERTVP(vp);
  return(tll_islocked(&vp->v_lock) || tll_haspendinglock(&vp->v_lock));
 }
 /*===========================================================================*
 *				init_vnodes				     *
 *===========================================================================*/
 PUBLIC void init_vnodes(void)
 {
  struct vnode *vp;
  for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp) {
 	vp->v_fs_e = NONE;
 	vp->v_mapfs_e = NONE;
 	vp->v_inode_nr = 0;
 	vp->v_ref_count = 0;
 	vp->v_fs_count = 0;
 	vp->v_mapfs_count = 0;
 	tll_init(&vp->v_lock);
  }
 }
 /*===========================================================================*
 *				lock_vnode				     *
 *===========================================================================*/
 PUBLIC int lock_vnode(struct vnode *vp, tll_access_t locktype)
 {
  int r;
  ASSERTVP(vp);
  r = tll_lock(&vp->v_lock, locktype);
 #if LOCK_DEBUG
  if (locktype == VNODE_READ) {
 	fp->fp_vp_rdlocks++;
  }
 #endif
  if (r == EBUSY) return(r);
  return(OK);
 }
 /*===========================================================================*
 *				unlock_vnode				     *
 *===========================================================================*/
 PUBLIC void unlock_vnode(struct vnode *vp)
 {
  int i;
  register struct vnode *rvp;
  struct worker_thread *w;
  ASSERTVP(vp);
 #if LOCK_DEBUG
  /* Decrease read-only lock counter when not locked as VNODE_OPCL or
   * VNODE_WRITE */
  if (!tll_locked_by_me(&vp->v_lock)) {
 	fp->fp_vp_rdlocks--;
  }
  for (i = 0; i < NR_VNODES; i++) {
 	rvp = &vnode[i];
 	w = rvp->v_lock.t_write;
 	assert(w != self);
 	while (w && w->w_next != NULL) {
 		w = w->w_next;
 		assert(w != self);
 	}
 	w = rvp->v_lock.t_serial;
 	assert(w != self);
 	while (w && w->w_next != NULL) {
 		w = w->w_next;
 		assert(w != self);
 	}
  }
 #endif
  tll_unlock(&vp->v_lock);
 }
 /*===========================================================================*
 *				dup_vnode				     *
 *===========================================================================*/
 PUBLIC void dup_vnode(struct vnode *vp)
 {
 /* dup_vnode() is called to increment the vnode and therefore the
 * referred inode's counter.
 */
  ASSERTVP(vp);
  vp->v_ref_count++;
 }
 /*===========================================================================*
 *				put_vnode				     *
 *===========================================================================*/
 PUBLIC void put_vnode(struct vnode *vp)
 {
 /* Decrease vnode's usage counter and decrease inode's usage counter in the
 * corresponding FS process. Decreasing the fs_count each time we decrease the
 * ref count would lead to poor performance. Instead, only decrease fs_count
 * when the ref count hits zero. However, this could lead to fs_count to wrap.
 * To prevent this, we drop the counter to 1 when the counter hits 256.
 * We maintain fs_count as a sanity check to make sure VFS and the FS are in
 * sync.
 */
  int r, lock_vp;
  ASSERTVP(vp);
  /* Lock vnode. It's quite possible this thread already has a lock on this
   * vnode. That's no problem, because the reference counter will not decrease
   * to zero in that case. However, if the counter does decrease to zero *and*
   * is already locked, we have a consistency problem somewhere. */
  lock_vp = lock_vnode(vp, VNODE_OPCL);
  if (vp->v_ref_count > 1) {
 	/* Decrease counter */
 	vp->v_ref_count--;
 	if (vp->v_fs_count > 256)
 		vnode_clean_refs(vp);
 	if (lock_vp != EBUSY) unlock_vnode(vp);
 	return;
  }
  /* If we already had a lock, there is a consistency problem */
  assert(lock_vp != EBUSY);
  tll_upgrade(&vp->v_lock);	/* Make sure nobody else accesses this vnode */
  /* A vnode that's not in use can't be put back. */
  if (vp->v_ref_count <= 0)
 	panic("put_vnode failed: bad v_ref_count %d\n", vp->v_ref_count);
  /* fs_count should indicate that the file is in use. */
  if (vp->v_fs_count <= 0)
 	panic("put_vnode failed: bad v_fs_count %d\n", vp->v_fs_count);
  /* Tell FS we don't need this inode to be open anymore. */
  r = req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count);
  if (r != OK) {
 	printf("VFS: putnode failed: %d\n", r);
 	util_stacktrace();
  }
  /* This inode could've been mapped. If so, tell mapped FS to close it as
   * well. If mapped onto same FS, this putnode is not needed. */
  if (vp->v_mapfs_e != NONE && vp->v_mapfs_e != vp->v_fs_e)
 	req_putnode(vp->v_mapfs_e, vp->v_mapinode_nr, vp->v_mapfs_count);
  vp->v_fs_count = 0;
  vp->v_ref_count = 0;
  vp->v_mapfs_count = 0;
  unlock_vnode(vp);
 }
 /*===========================================================================*
 *				vnode_clean_refs			     *
 *===========================================================================*/
 PUBLIC void vnode_clean_refs(struct vnode *vp)
 {
 /* Tell the underlying FS to drop all reference but one. */
  if (vp == NULL) return;
  if (vp->v_fs_count <= 1) return;	/* Nothing to do */
  /* Drop all references except one */
  req_putnode(vp->v_fs_e, vp->v_inode_nr, vp->v_fs_count - 1);
  vp->v_fs_count = 1;
 }
 #define REFVP(v) { vp = (v); CHECKVN(v); vp->v_ref_check++; }
 #if DO_SANITYCHECKS
 /*===========================================================================*
 *				check_vrefs				     *
 *===========================================================================*/
 PUBLIC int check_vrefs()
 {
 	int i, bad;
 	int ispipe_flag, ispipe_mode;
 	struct vnode *vp;
 	struct vmnt *vmp;
 	struct fproc *rfp;
 	struct filp *f;
 	/* Clear v_ref_check */
 	for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
 		vp->v_ref_check= 0;
 	/* Count reference for processes */
 	for (rfp=&fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 		if (rfp->fp_pid == PID_FREE)
 			continue;
 		if(rfp->fp_rd) REFVP(rfp->fp_rd);
                if(rfp->fp_wd) REFVP(rfp->fp_wd);
 	}
 	/* Count references from filedescriptors */
 	for (f = &filp[0]; f < &filp[NR_FILPS]; f++)
 	{
 		if (f->filp_count == 0)
 			continue;
 		REFVP(f->filp_vno);
 	}
 	/* Count references to mount points */
 	for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp)
 	{
 		if (vmp->m_dev == NO_DEV)
 			continue;
 		REFVP(vmp->m_root_node);
 		if(vmp->m_mounted_on)
 			REFVP(vmp->m_mounted_on);
 	}
 	/* Check references */
 	bad= 0;
 	for (vp = &vnode[0]; vp < &vnode[NR_VNODES]; ++vp)
 	{
 		if (vp->v_ref_count != vp->v_ref_check)
 		{
 			printf(
 "Bad reference count for inode %d on device 0x%x: found %d, listed %d\n",
 				vp->v_inode_nr, vp->v_dev, vp->v_ref_check,
 				vp->v_ref_count);
 			printf("last marked at %s, %d\n",
 				vp->v_file, vp->v_line);
 			bad= 1;
 		}
 		/* Also check v_pipe */
 		if (vp->v_ref_count != 0)
 		{
 			ispipe_flag= (vp->v_pipe == I_PIPE);
 			ispipe_mode= ((vp->v_mode & I_TYPE) == I_NAMED_PIPE);
 			if (ispipe_flag != ispipe_mode)
 			{
 				printf(
 "Bad v_pipe for inode %d on device 0x%x: found %d, mode 0%o\n",
 				vp->v_inode_nr, vp->v_dev, vp->v_pipe,
 				vp->v_mode);
 				printf("last marked at %s, %d\n",
 					vp->v_file, vp->v_line);
 				bad= 1;
 			}
 		}
 	}
 	return !bad;
 }
 #endif
--- a/servers/avfs/vnode.h
+++ b/servers/avfs/vnode.h
@ -0,0 +1,40 @@
 #ifndef __VFS_VNODE_H__
 #define __VFS_VNODE_H__
 EXTERN struct vnode {
  endpoint_t v_fs_e;            /* FS process' endpoint number */
  endpoint_t v_mapfs_e;		/* mapped FS process' endpoint number */
  ino_t v_inode_nr;		/* inode number on its (minor) device */
  ino_t v_mapinode_nr;		/* mapped inode number of mapped FS. */
  mode_t v_mode;		/* file type, protection, etc. */
  uid_t v_uid;			/* uid of inode. */
  gid_t v_gid;			/* gid of inode. */
  off_t v_size;			/* current file size in bytes */
  int v_ref_count;		/* # times vnode used; 0 means slot is free */
  int v_fs_count;		/* # reference at the underlying FS */
  int v_mapfs_count;		/* # reference at the underlying mapped FS */
 #if 0
  int v_ref_check;		/* for consistency checks */
 #endif
  char v_pipe;			/* set to I_PIPE if pipe */
  off_t v_pipe_rd_pos;
  off_t v_pipe_wr_pos;
  endpoint_t v_bfs_e;		/* endpoint number for the FS proces in case
 				   of a block special file */
  dev_t v_dev;                  /* device number on which the corresponding
                                   inode resides */
  dev_t v_sdev;                 /* device number for special files */
  struct vmnt *v_vmnt;          /* vmnt object of the partition */
  tll_t v_lock;			/* three-level-lock */
 } vnode[NR_VNODES];
 /* Field values. */
 #define NO_PIPE            0	/* i_pipe is NO_PIPE if inode is not a pipe */
 #define I_PIPE             1	/* i_pipe is I_PIPE if inode is a pipe */
 /* vnode lock types mapping */
 #define VNODE_READ TLL_READ
 #define VNODE_OPCL TLL_READSER
 #define VNODE_WRITE TLL_WRITE
 #endif
--- a/servers/avfs/worker.c
+++ b/servers/avfs/worker.c
@ -0,0 +1,336 @@
 #include "fs.h"
 #include "glo.h"
 #include "fproc.h"
 #include "threads.h"
 #include "job.h"
 #include <assert.h>
 FORWARD _PROTOTYPE( void append_job, (struct job *job,
 					void *(*func)(void *arg))	);
 FORWARD _PROTOTYPE( void get_work, (struct worker_thread *worker)	);
 FORWARD _PROTOTYPE( void *worker_main, (void *arg)			);
 FORWARD _PROTOTYPE( void worker_sleep, (struct worker_thread *worker)	);
 FORWARD _PROTOTYPE( void worker_wake, (struct worker_thread *worker)	);
 PRIVATE int init = 0;
 PRIVATE mthread_attr_t tattr;
 #ifdef MKCOVERAGE
 # define TH_STACKSIZE (10 * 1024)
 #else
 # define TH_STACKSIZE (6 * 1024)
 #endif
 #define ASSERTW(w) assert((w) == &sys_worker || (w) == &dl_worker || \
 		   ((w) >= &workers[0] && (w) < &workers[NR_WTHREADS]));
 /*===========================================================================*
 *				worker_init				     *
 *===========================================================================*/
 PUBLIC void worker_init(struct worker_thread *worker)
 {
 /* Initialize worker thread */
  if (!init) {
 	threads_init();
 	assert(mthread_attr_init(&tattr) == 0);
 	if (mthread_attr_setstacksize(&tattr, TH_STACKSIZE) != 0)
 		panic("couldn't set default thread stack size");
 	if (mthread_attr_setdetachstate(&tattr, MTHREAD_CREATE_DETACHED) != 0)
 		panic("couldn't set default thread detach state");
 	pending = 0;
 	init = 1;
  }
  ASSERTW(worker);
  worker->w_job.j_func = NULL;		/* Mark not in use */
  worker->w_next = NULL;
  assert(mutex_init(&worker->w_event_mutex, NULL) == 0);
  assert(cond_init(&worker->w_event, NULL) == 0);
  assert(mthread_create(&worker->w_tid, &tattr, worker_main, (void *) worker) == 0);
  yield();
 }
 /*===========================================================================*
 *				get_work				     *
 *===========================================================================*/
 PRIVATE void get_work(struct worker_thread *worker)
 {
 /* Find new work to do. Work can be 'queued', 'pending', or absent. In the
 * latter case wait for new work to come in. */
  struct job *new_job;
  struct fproc *rfp;
  ASSERTW(worker);
  self = worker;
  /* Do we have queued work to do? */
  if ((new_job = worker->w_job.j_next) != NULL) {
 	worker->w_job = *new_job;
 	free(new_job);
 	return;
  } else if (worker != &sys_worker && worker != &dl_worker && pending > 0) {
 	/* Find pending work */
 	for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 		if (rfp->fp_flags & FP_PENDING) {
 			worker->w_job = rfp->fp_job;
 			rfp->fp_job.j_func = NULL;
 			rfp->fp_flags &= ~FP_PENDING; /* No longer pending */
 			pending--;
 			assert(pending >= 0);
 			return;
 		}
 	}
 	panic("Pending work inconsistency");
  }
  /* Wait for work to come to us */
  worker_sleep(worker);
 }
 /*===========================================================================*
 *				worker_available				     *
 *===========================================================================*/
 PUBLIC int worker_available(void)
 {
  int busy, i;
  busy = 0;
  for (i = 0; i < NR_WTHREADS; i++) {
 	if (workers[i].w_job.j_func != NULL)
 		busy++;
  }
  return(NR_WTHREADS - busy);
 }
 /*===========================================================================*
 *				worker_main				     *
 *===========================================================================*/
 PRIVATE void *worker_main(void *arg)
 {
 /* Worker thread main loop */
  struct worker_thread *me;
  me = (struct worker_thread *) arg;
  ASSERTW(me);
  while(TRUE) {
 	get_work(me);
 	/* Register ourselves in fproc table if possible */
 	if (me->w_job.j_fp != NULL) {
 		me->w_job.j_fp->fp_wtid = me->w_tid;
 	}
 	/* Carry out work */
 	me->w_job.j_func(&me->w_job);
 	/* Mark ourselves as done */
 	me->w_job.j_func = NULL;
  }
  return(NULL);	/* Unreachable */
 }
 /*===========================================================================*
 *				dl_worker_start				     *
 *===========================================================================*/
 PUBLIC void dl_worker_start(void *(*func)(void *arg))
 {
 /* Start the deadlock resolving worker. This worker is reserved to run in case
 * all other workers are busy and we have to have an additional worker to come
 * to the rescue. */
  assert(dl_worker.w_job.j_func == NULL);
  if (dl_worker.w_job.j_func == NULL) {
 	dl_worker.w_job.j_fp = fp;
 	dl_worker.w_job.j_m_in = m_in;
 	dl_worker.w_job.j_func = func;
 	worker_wake(&dl_worker);
  }
 }
 /*===========================================================================*
 *				sys_worker_start			     *
 *===========================================================================*/
 PUBLIC void sys_worker_start(void *(*func)(void *arg))
 {
 /* Carry out work for the system (i.e., kernel or PM). If this thread is idle
 * do it right away, else create new job and append it to the queue. */
  if (sys_worker.w_job.j_func == NULL) {
 	sys_worker.w_job.j_fp = fp;
 	sys_worker.w_job.j_m_in = m_in;
 	sys_worker.w_job.j_func = func;
 	worker_wake(&sys_worker);
  } else {
 	append_job(&sys_worker.w_job, func);
  }
 }
 /*===========================================================================*
 *				append_job				     *
 *===========================================================================*/
 PRIVATE void append_job(struct job *job, void *(*func)(void *arg))
 {
 /* Append a job */
  struct job *new_job, *tail;
  /* Create new job */
  new_job = calloc(1, sizeof(struct job));
  assert(new_job != NULL);
  new_job->j_fp = fp;
  new_job->j_m_in = m_in;
  new_job->j_func = func;
  new_job->j_next = NULL;
  /* Append to queue */
  tail = job;
  while (tail->j_next != NULL) tail = tail->j_next;
  tail->j_next = new_job;
 }
 /*===========================================================================*
 *				worker_start				     *
 *===========================================================================*/
 PUBLIC void worker_start(void *(*func)(void *arg))
 {
 /* Find an available worker or wait for one */
  int i;
  struct worker_thread *worker;
  worker = NULL;
  for (i = 0; i < NR_WTHREADS; i++) {
 	if (workers[i].w_job.j_func == NULL) {
 		worker = &workers[i];
 		break;
 	}
  }
  if (worker != NULL) {
 	worker->w_job.j_fp = fp;
 	worker->w_job.j_m_in = m_in;
 	worker->w_job.j_func = func;
 	worker->w_job.j_next = NULL;
 	worker_wake(worker);
 	return;
  }
  /* No worker threads available, let's wait for one to finish. */
  /* If this process already has a job scheduled, forget about this new
   * job;
   *  - the new job is do_dummy and we have already scheduled an actual job
   *  - the new job is an actual job and we have already scheduled do_dummy in
   *    order to exit this proc, so doing the new job is pointless. */
  if (fp->fp_job.j_func == NULL) {
 	assert(!(fp->fp_flags & FP_PENDING));
 	fp->fp_job.j_fp = fp;
 	fp->fp_job.j_m_in = m_in;
 	fp->fp_job.j_func = func;
 	fp->fp_job.j_next = NULL;
 	fp->fp_flags |= FP_PENDING;
 	pending++;
  }
 }
 /*===========================================================================*
 *				worker_sleep				     *
 *===========================================================================*/
 PRIVATE void worker_sleep(struct worker_thread *worker)
 {
  ASSERTW(worker);
  assert(self == worker);
  assert(mutex_lock(&worker->w_event_mutex) == 0);
  assert(cond_wait(&worker->w_event, &worker->w_event_mutex) == 0);
  assert(mutex_unlock(&worker->w_event_mutex) == 0);
  self = worker;
 }
 /*===========================================================================*
 *				worker_wake				     *
 *===========================================================================*/
 PRIVATE void worker_wake(struct worker_thread *worker)
 {
 /* Signal a worker to wake up */
  ASSERTW(worker);
  assert(mutex_lock(&worker->w_event_mutex) == 0);
  assert(cond_signal(&worker->w_event) == 0);
  assert(mutex_unlock(&worker->w_event_mutex) == 0);
 }
 /*===========================================================================*
 *				worker_wait				     *
 *===========================================================================*/
 PUBLIC void worker_wait(void)
 {
  struct worker_thread *worker;
  worker = worker_self();
  worker->w_job.j_m_in = m_in;	/* Store important global data */
  assert(fp == worker->w_job.j_fp);
  worker_sleep(worker);
  /* We continue here after waking up */
  fp = worker->w_job.j_fp;	/* Restore global data */
  m_in = worker->w_job.j_m_in;
  assert(worker->w_next == NULL);
 }
 /*===========================================================================*
 *				worker_signal				     *
 *===========================================================================*/
 PUBLIC void worker_signal(struct worker_thread *worker)
 {
  ASSERTW(worker);		/* Make sure we have a valid thread */
  worker_wake(worker);
 }
 /*===========================================================================*
 *				worker_self				     *
 *===========================================================================*/
 PUBLIC struct worker_thread *worker_self(void)
 {
  struct worker_thread *worker;
  worker = worker_get(mthread_self());
  assert(worker != NULL);
  return(worker);
 }
 /*===========================================================================*
 *				worker_get				     *
 *===========================================================================*/
 PUBLIC struct worker_thread *worker_get(thread_t worker_tid)
 {
  int i;
  struct worker_thread *worker;
  worker = NULL;
  if (worker_tid == sys_worker.w_tid)
 	worker = &sys_worker;
  else if (worker_tid == dl_worker.w_tid)
 	worker = &dl_worker;
  else {
 	for (i = 0; i < NR_WTHREADS; i++) {
 		if (workers[i].w_tid == worker_tid) {
 			worker = &workers[i];
 			break;
 		}
 	}
  }
  return(worker);
 }
 /*===========================================================================*
 *				worker_getjob				     *
 *===========================================================================*/
 PUBLIC struct job *worker_getjob(thread_t worker_tid)
 {
  struct worker_thread *worker;
  if ((worker = worker_get(worker_tid)) != NULL)
 	return(&worker->w_job);
  return(NULL);
 }
--- a/servers/avfs/write.c
+++ b/servers/avfs/write.c
@ -0,0 +1,19 @@
 /* This file is the counterpart of "read.c".  It contains the code for writing
 * insofar as this is not contained in read_write().
 *
 * The entry points into this file are
 *   do_write:     call read_write to perform the WRITE system call
 */
 #include "fs.h"
 #include "file.h"
 /*===========================================================================*
 *				do_write				     *
 *===========================================================================*/
 PUBLIC int do_write()
 {
 /* Perform the write(fd, buffer, nbytes) system call. */
  return(read_write(WRITING));
 }
--- a/servers/is/Makefile
+++ b/servers/is/Makefile
@ -1,4 +1,8 @@
 # Makefile for Information Server (IS)
 #
 .include <bsd.own.mk>
 PROG=	is
 SRCS=	main.c dmp.c dmp_kernel.c dmp_pm.c dmp_fs.c dmp_rs.c dmp_ds.c dmp_vm.c
@ -13,4 +17,8 @@ CPPFLAGS.dmp_kernel.c+=	-I${MINIXSRCDIR}
 CPPFLAGS.dmp_rs.c+=	-I${MINIXSRCDIR}
 CPPFLAGS.dmp_vm.c+=	-I${MINIXSRCDIR}
 .if ${BUILDAVFS} == "yes"
 CFLAGS+= -D_USEAVFS
 .endif
 .include <minix.service.mk>
--- a/servers/is/dmp_fs.c
+++ b/servers/is/dmp_fs.c
@ -10,9 +10,15 @@
 #include "inc.h"
 #include "../mfs/const.h"
-#include "../vfs/const.h"
+#if defined(_USEAVFS)
-#include "../vfs/fproc.h"
+# include "../avfs/const.h"
-#include "../vfs/dmap.h"
+# include "../avfs/fproc.h"
 # include "../avfs/dmap.h"
 #else
 # include "../vfs/const.h"
 # include "../vfs/fproc.h"
 # include "../vfs/dmap.h"
 #endif
 #include <minix/dmap.h>
 PUBLIC struct fproc fproc[NR_PROCS];
@ -35,6 +41,7 @@ PUBLIC void fproc_dmp()
  	fp = &fproc[i];
  	if (fp->fp_pid <= 0) continue;
  	if (++n > 22) break;
 #if defined(_USEVFS)
  	printf("%3d  %4d  %2d/%d  0x%05x %2d (%2d) %2d (%2d) %3d   %3d %3d ",
  		i, fp->fp_pid, 
  		((fp->fp_tty>>MAJOR)&BYTE), ((fp->fp_tty>>MINOR)&BYTE), 
@ -43,6 +50,16 @@ PUBLIC void fproc_dmp()
  		fp->fp_sesldr,
  		fp->fp_blocked_on, !!fp->fp_revived
  	);
 #else
 	printf("%3d  %4d  %2d/%d  0x%05x %2d (%2d) %2d (%2d) %3d   %3d %3d ",
 		i, fp->fp_pid,
 		major(fp->fp_tty), minor(fp->fp_tty),
 		fp->fp_umask,
 		fp->fp_realuid, fp->fp_effuid, fp->fp_realgid, fp->fp_effgid,
 		!!(fp->fp_flags & FP_SESLDR),
 		fp->fp_blocked_on, !!(fp->fp_flags & FP_REVIVED)
 	);
 #endif
 	if (fp->fp_blocked_on == FP_BLOCKED_ON_OTHER)
 		printf("%4d\n", fp->fp_task);
 	else
--- a/servers/procfs/Makefile
+++ b/servers/procfs/Makefile
@ -1,9 +1,17 @@
 # Makefile for ProcFS server
 #
 .include <bsd.own.mk>
 PROG=	procfs
 SRCS=	buf.c main.c pid.c root.c tree.c util.c cpuinfo.c
 CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers
 .if ${BUILDAVFS} == "yes"
 CFLAGS+= -D_USEAVFS
 .endif
 DPADD+=	${LIBVTREEFS} ${LIBSYS}
 LDADD+=	-lvtreefs -lsys
--- a/servers/procfs/inc.h
+++ b/servers/procfs/inc.h
@ -50,8 +50,13 @@
 #include "kernel/type.h"
 #include "kernel/proc.h"
 #include "pm/mproc.h"
-#include "vfs/const.h"
+#if defined(_USEAVFS)
-#include "vfs/fproc.h"
+# include "avfs/const.h"
 # include "avfs/fproc.h"
 #else
 # include "vfs/const.h"
 # include "vfs/fproc.h"
 #endif
 #include <minix/vtreefs.h>
 #include <minix/procfs.h>
--- a/share/mk/bsd.own.mk
+++ b/share/mk/bsd.own.mk
@ -12,6 +12,8 @@ SMP_FLAGS += -DCONFIG_MAX_CPUS=${CONFIG_MAX_CPUS}
 CPPFLAGS+= ${SMP_FLAGS}
 BUILDAVFS?= "no"
 MAKECONF?=	/etc/make.conf
 .-include "${MAKECONF}"
--- a/tools/Makefile
+++ b/tools/Makefile
@ -9,19 +9,26 @@ MDEC=	/usr/mdec
 GEN_FILES=	*.bak image kernel *.iso *.iso.gz cdfdimage rootimage src
 # Specify the programs that are part of the system image.
 .if ${BUILDAVFS} == "yes"
 VFS= "../servers/avfs/vfs"
 PFS= "../servers/apfs/pfs"
 .else
 VFS= "../servers/vfs/vfs"
 PFS= "../servers/pfs/pfs"
 .endif
 KERNEL= kernel
 PROGRAMS= \
 	../servers/ds/ds \
 	../servers/rs/rs \
 	../servers/pm/pm \
 	../servers/sched/sched \
-	../servers/vfs/vfs \
+	${VFS} \
 	../drivers/memory/memory \
 	../drivers/log/log \
 	../drivers/tty/tty \
 	../servers/mfs/mfs \
 	../servers/vm/vm \
-	../servers/pfs/pfs \
+	${PFS} \
 	../servers/init/init
 usage: