From 33a7ac7557f90652c7a96ae97d911ef129677240 Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Tue, 7 May 2013 12:41:07 +0000 Subject: [PATCH] vfs: mmap support . libc: add vfs_mmap, a way for vfs to initiate mmap()s. This is a good special case to have as vfs is a slightly different client from regular user processes. It doesn't do it for itself, and has the dev & inode info already so the callback to VFS for the lookup isn't necessary. So it has different info to have to give to VM. . libc: also add minix_mmap64() that accepts a 64-bit offset, even though our off_t is still 32 bit now. . On exec() time, try to mmap() in the executable if available. (It is not yet available in this commit.) . To support mmap(), add do_vm_call that allows VM to lookup (to ino+dev), do i/o from and close FD's on behalf of other processes. Change-Id: I831551e45a6781c74313c450eb9c967a68505932 --- commands/service/parse.c | 1 + etc/system.conf | 3 +- include/minix/callnr.h | 4 +- include/minix/com.h | 9 ++- include/minix/vm.h | 8 ++ lib/libc/sys-minix/mmap.c | 26 ++++++ lib/libexec/exec_elf.c | 2 - servers/pm/table.c | 1 + servers/vfs/coredump.c | 2 +- servers/vfs/exec.c | 69 +++++++++++++++- servers/vfs/filedes.c | 4 +- servers/vfs/misc.c | 165 +++++++++++++++++++++++++++++++++++++- servers/vfs/open.c | 65 +++++++++------ servers/vfs/pipe.c | 4 +- servers/vfs/proto.h | 17 ++-- servers/vfs/read.c | 91 +++++++++++++-------- servers/vfs/table.c | 1 + servers/vfs/utility.c | 2 - sys/sys/mman.h | 1 + 19 files changed, 395 insertions(+), 80 deletions(-) diff --git a/commands/service/parse.c b/commands/service/parse.c index ea805a073..2808da172 100644 --- a/commands/service/parse.c +++ b/commands/service/parse.c @@ -734,6 +734,7 @@ struct { "PROCCTL", VM_PROCCTL }, { "MAPCACHEPAGE", VM_MAPCACHEPAGE }, { "SETCACHEPAGE", VM_SETCACHEPAGE }, + { "VFS_MMAP", VM_VFS_MMAP }, { NULL, 0 }, }; diff --git a/etc/system.conf b/etc/system.conf index 9d9e8badb..944b5ccba 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -94,7 +94,8 @@ service vfs VIRCOPY # 15 MEMSET ; - vm PROCCTL; + vm PROCCTL + VFS_MMAP; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS diff --git a/include/minix/callnr.h b/include/minix/callnr.h index e38d881d8..0ec74931b 100644 --- a/include/minix/callnr.h +++ b/include/minix/callnr.h @@ -1,4 +1,4 @@ -#define NCALLS 117 /* number of system calls allowed */ +#define NCALLS 118 /* number of system calls allowed */ /* In case it isn't obvious enough: this list is sorted numerically. */ #define EXIT 1 @@ -107,6 +107,8 @@ #define CLOCK_GETTIME 115 /* clock_gettime() */ #define CLOCK_SETTIME 116 /* clock_settime() */ +#define VFS_VMCALL 117 + #define TASK_REPLY 121 /* to VFS: reply code from drivers, not * really a standalone call. */ diff --git a/include/minix/com.h b/include/minix/com.h index 8af307b9f..08496d220 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -1005,6 +1005,7 @@ # define VMV_DEV m10_i4 # define VMV_INO m10_l1 # define VMV_FD m10_l2 +# define VMV_SIZE_PAGES m10_l3 #define VM_REMAP (VM_RQ_BASE+33) # define VMRE_D m1_i1 @@ -1075,8 +1076,10 @@ #define VMPPARAM_CLEAR 1 /* values for VMPCTL_PARAM */ +#define VM_VFS_MMAP (VM_RQ_BASE+46) + /* Total. */ -#define NR_VM_CALLS 46 +#define NR_VM_CALLS 47 #define VM_CALL_MASK_SIZE BITMAP_CHUNKS(NR_VM_CALLS) /* not handled as a normal VM call, thus at the end of the reserved rage */ @@ -1086,8 +1089,8 @@ /* Basic vm calls allowed to every process. */ #define VM_BASIC_CALLS \ - VM_MMAP, VM_MUNMAP, VM_MAP_PHYS, VM_UNMAP_PHYS, \ - VM_INFO, VM_MAPCACHEPAGE + VM_MMAP, VM_VFS_REPLY, VM_MUNMAP, VM_MAP_PHYS, VM_UNMAP_PHYS, \ + VM_INFO /*===========================================================================* * Messages for IPC server * diff --git a/include/minix/vm.h b/include/minix/vm.h index 698905954..6ed245e08 100644 --- a/include/minix/vm.h +++ b/include/minix/vm.h @@ -26,6 +26,14 @@ int vm_forgetblock(u64_t id); void vm_forgetblocks(void); int vm_yield_block_get_block(u64_t yieldid, u64_t getid, void *mem, vir_bytes len); +int minix_vfs_mmap(endpoint_t who, u32_t offset, u32_t len, + u32_t dev, u32_t ino, u16_t fd, u32_t vaddr, u16_t clearend, u16_t + flags); + +/* minix vfs mmap flags */ +#define MVM_LENMASK 0x0FFF +#define MVM_FLAGSMASK 0xF000 +#define MVM_WRITABLE 0x8000 /* Invalid ID with special meaning for the vm_yield_block_get_block * interface. diff --git a/lib/libc/sys-minix/mmap.c b/lib/libc/sys-minix/mmap.c index 4a3de773e..f213f8fbf 100644 --- a/lib/libc/sys-minix/mmap.c +++ b/lib/libc/sys-minix/mmap.c @@ -51,12 +51,38 @@ void *minix_mmap_for(endpoint_t forwhom, return (void *) m.VMM_RETADDR; } +int minix_vfs_mmap(endpoint_t who, u32_t offset, u32_t len, + u32_t dev, u32_t ino, u16_t fd, u32_t vaddr, u16_t clearend, + u16_t flags) +{ + message m; + + memset(&m, 0, sizeof(message)); + + m.m_u.m_vm_vfs.who = who; + m.m_u.m_vm_vfs.offset = offset; + m.m_u.m_vm_vfs.dev = dev; + m.m_u.m_vm_vfs.ino = ino; + m.m_u.m_vm_vfs.vaddr = vaddr; + m.m_u.m_vm_vfs.len = len; + m.m_u.m_vm_vfs.fd = fd; + m.m_u.m_vm_vfs.clearend_and_flags = clearend | flags; + + return _syscall(VM_PROC_NR, VM_VFS_MMAP, &m); +} + void *minix_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { return minix_mmap_for(SELF, addr, len, prot, flags, fd, offset); } +void *minix_mmap64(void *addr, size_t len, int prot, int flags, + int fd, u64_t offset) +{ + return minix_mmap_for(SELF, addr, len, prot, flags, fd, offset); +} + int minix_munmap(void *addr, size_t len) { message m; diff --git a/lib/libexec/exec_elf.c b/lib/libexec/exec_elf.c index 24bfed966..06c1bfd7d 100644 --- a/lib/libexec/exec_elf.c +++ b/lib/libexec/exec_elf.c @@ -249,8 +249,6 @@ int libexec_load_elf(struct exec_info *execi) #endif } } else { - if(try_mmap) printf("libexec: mmap failed\n"); - /* make us some memory */ if(execi->allocmem_prealloc_junk(execi, vaddr, seg_membytes) != OK) { if(execi->clearproc) execi->clearproc(execi); diff --git a/servers/pm/table.c b/servers/pm/table.c index 258ed1501..d1c2075a1 100644 --- a/servers/pm/table.c +++ b/servers/pm/table.c @@ -128,6 +128,7 @@ int (*call_vec[])(void) = { do_getres, /* 114 = clock_getres */ do_gettime, /* 115 = clock_gettime */ do_settime, /* 116 = clock_settime */ + no_sys, /* 117 = (vmcall) */ }; /* This should not fail with "array size is negative": */ extern int dummy[sizeof(call_vec) == NCALLS * sizeof(call_vec[0]) ? 1 : -1]; diff --git a/servers/vfs/coredump.c b/servers/vfs/coredump.c index 9b5d24573..578d43df5 100644 --- a/servers/vfs/coredump.c +++ b/servers/vfs/coredump.c @@ -177,7 +177,7 @@ static void adjust_offsets(Elf_Phdr phdrs[], int phnum) *===========================================================================*/ static void write_buf(struct filp *f, char *buf, size_t size) { - read_write(WRITING, f, buf, size, VFS_PROC_NR); + read_write(fp, WRITING, f, buf, size, VFS_PROC_NR); } /*===========================================================================* diff --git a/servers/vfs/exec.c b/servers/vfs/exec.c index ce00e56c8..9e9b6d096 100644 --- a/servers/vfs/exec.c +++ b/servers/vfs/exec.c @@ -16,6 +16,7 @@ #include "fs.h" #include +#include #include #include #include @@ -30,6 +31,7 @@ #include "path.h" #include "param.h" #include "vnode.h" +#include "file.h" #include #include #include @@ -48,6 +50,8 @@ struct vfs_exec_info { int is_dyn; /* Dynamically linked executable */ int elf_main_fd; /* Dyn: FD of main program execuatble */ char execname[PATH_MAX]; /* Full executable invocation */ + int vmfd; + int vmfd_used; }; static void lock_exec(void); @@ -185,6 +189,27 @@ static int get_read_vp(struct vfs_exec_info *execi, r=get_read_vp(&e,f,p,s,rs,fp); if(r != OK) { FAILCHECK(r); } \ } while(0) +static int vfs_memmap(struct exec_info *execi, + vir_bytes vaddr, vir_bytes len, vir_bytes foffset, u16_t clearend, + int protflags) +{ + struct vfs_exec_info *vi = (struct vfs_exec_info *) execi->opaque; + struct vnode *vp = ((struct vfs_exec_info *) execi->opaque)->vp; + int r; + u16_t flags = 0; + + if(protflags & PROT_WRITE) + flags |= MVM_WRITABLE; + + r = minix_vfs_mmap(execi->proc_e, foffset, len, + vp->v_dev, vp->v_inode_nr, vi->vmfd, vaddr, clearend, flags); + if(r == OK) { + vi->vmfd_used = 1; + } + + return r; +} + /*===========================================================================* * pm_exec * *===========================================================================*/ @@ -205,14 +230,21 @@ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, int i; static char fullpath[PATH_MAX], elf_interpreter[PATH_MAX], + firstexec[PATH_MAX], finalexec[PATH_MAX]; struct lookup resolve; + struct fproc *vmfp = &fproc[VM_PROC_NR]; stackhook_t makestack = NULL; + static int n; + n++; + struct filp *newfilp = NULL; lock_exec(); + lock_proc(vmfp, 0); /* unset execi values are 0. */ memset(&execi, 0, sizeof(execi)); + execi.vmfd = -1; /* passed from exec() libc code */ execi.userflags = user_exec_flags; @@ -223,6 +255,7 @@ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, rfp = fp = &fproc[slot]; lookup_init(&resolve, fullpath, PATH_NOFLAGS, &execi.vmp, &execi.vp); + resolve.l_vmnt_lock = VMNT_READ; resolve.l_vnode_lock = VNODE_READ; @@ -244,6 +277,7 @@ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, /* Get the exec file name. */ FAILCHECK(fetch_name(path, path_len, fullpath)); strlcpy(finalexec, fullpath, PATH_MAX); + strlcpy(firstexec, fullpath, PATH_MAX); /* Get_read_vp will return an opened vn in execi. * if necessary it releases the existing vp so we can @@ -264,6 +298,7 @@ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, FAILCHECK(fetch_name(path, path_len, fullpath)); FAILCHECK(patch_stack(execi.vp, mbuf, &frame_len, fullpath)); strlcpy(finalexec, fullpath, PATH_MAX); + strlcpy(firstexec, fullpath, PATH_MAX); Get_read_vp(execi, fullpath, 1, 0, &resolve, fp); } @@ -299,9 +334,31 @@ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, * be looked up */ strlcpy(fullpath, elf_interpreter, PATH_MAX); + strlcpy(firstexec, elf_interpreter, PATH_MAX); Get_read_vp(execi, fullpath, 0, 0, &resolve, fp); } + /* We also want an FD for VM to mmap() the process in if possible. */ + { + struct vnode *vp = execi.vp; + assert(vp); + if(vp->v_vmnt->m_haspeek && major(vp->v_dev) != MEMORY_MAJOR) { + int newfd = -1; + if(get_fd(vmfp, 0, R_BIT, &newfd, &newfilp) == OK) { + assert(newfd >= 0 && newfd < OPEN_MAX); + assert(!vmfp->fp_filp[newfd]); + newfilp->filp_count = 1; + newfilp->filp_vno = vp; + newfilp->filp_flags = O_RDONLY; + FD_SET(newfd, &vmfp->fp_filp_inuse); + vmfp->fp_filp[newfd] = newfilp; + /* dup_vnode(vp); */ + execi.vmfd = newfd; + execi.args.memmap = vfs_memmap; + } + } + } + /* callback functions and data */ execi.args.copymem = read_seg; execi.args.clearproc = libexec_clearproc_vm_procctl; @@ -354,11 +411,21 @@ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, strlcpy(rfp->fp_name, execi.args.progname, PROC_NAME_LEN); pm_execfinal: - if (execi.vp != NULL) { + if(newfilp) unlock_filp(newfilp); + else if (execi.vp != NULL) { unlock_vnode(execi.vp); put_vnode(execi.vp); } + + if(execi.vmfd >= 0 && !execi.vmfd_used) { + if(OK != close_fd(vmfp, execi.vmfd)) { + printf("VFS: unexpected close fail of vm fd\n"); + } + } + + unlock_proc(vmfp); unlock_exec(); + return(r); } diff --git a/servers/vfs/filedes.c b/servers/vfs/filedes.c index f0c3de8a9..c0328cdbc 100644 --- a/servers/vfs/filedes.c +++ b/servers/vfs/filedes.c @@ -147,7 +147,7 @@ void init_filps(void) /*===========================================================================* * get_fd * *===========================================================================*/ -int get_fd(int start, mode_t bits, int *k, struct filp **fpt) +int get_fd(struct fproc *rfp, int start, mode_t bits, int *k, struct filp **fpt) { /* Look for a free file descriptor and a free filp slot. Fill in the mode word * in the latter, but don't claim either one yet, since the open() or creat() @@ -159,7 +159,7 @@ int get_fd(int start, mode_t bits, int *k, struct filp **fpt) /* Search the fproc fp_filp table for a free file descriptor. */ for (i = start; i < OPEN_MAX; i++) { - if (fp->fp_filp[i] == NULL && !FD_ISSET(i, &fp->fp_filp_inuse)) { + if (rfp->fp_filp[i] == NULL && !FD_ISSET(i, &rfp->fp_filp_inuse)) { /* A file descriptor has been located. */ *k = i; break; diff --git a/servers/vfs/misc.c b/servers/vfs/misc.c index 44d9d53b7..9dddfd5fe 100644 --- a/servers/vfs/misc.c +++ b/servers/vfs/misc.c @@ -131,7 +131,7 @@ int do_fcntl(message *UNUSED(m_out)) case F_DUPFD: /* This replaces the old dup() system call. */ if (fcntl_argx < 0 || fcntl_argx >= OPEN_MAX) r = EINVAL; - else if ((r = get_fd(fcntl_argx, 0, &new_fd, NULL)) == OK) { + else if ((r = get_fd(fp, fcntl_argx, 0, &new_fd, NULL)) == OK) { f->filp_count++; fp->fp_filp[new_fd] = f; FD_SET(new_fd, &fp->fp_filp_inuse); @@ -311,6 +311,166 @@ int do_fsync(message *UNUSED(m_out)) return(r); } +int dupvm(struct fproc *rfp, int pfd, int *vmfd, struct filp **newfilp) +{ + int result, procfd; + struct filp *f = NULL; + struct fproc *vmf = &fproc[VM_PROC_NR]; + + *newfilp = NULL; + + if ((f = get_filp2(rfp, pfd, VNODE_READ)) == NULL) { + printf("VFS dupvm: get_filp2 failed\n"); + return EBADF; + } + + if(!f->filp_vno->v_vmnt->m_haspeek) { + unlock_filp(f); + printf("VFS dupvm: no peek available\n"); + return EINVAL; + } + + assert(f->filp_vno); + assert(f->filp_vno->v_vmnt); + + if (!S_ISREG(f->filp_vno->v_mode) && !S_ISBLK(f->filp_vno->v_mode)) { + printf("VFS: mmap regular/blockdev only; dev 0x%x ino %d has mode 0%o\n", + f->filp_vno->v_dev, f->filp_vno->v_inode_nr, f->filp_vno->v_mode); + unlock_filp(f); + return EINVAL; + } + + /* get free FD in VM */ + if((result=get_fd(vmf, 0, 0, &procfd, NULL)) != OK) { + unlock_filp(f); + printf("VFS dupvm: getfd failed\n"); + return result; + } + + *vmfd = procfd; + + f->filp_count++; + assert(f->filp_count > 0); + vmf->fp_filp[procfd] = f; + + /* mmap FD's are inuse */ + FD_SET(procfd, &vmf->fp_filp_inuse); + + *newfilp = f; + + return OK; +} + +/*===========================================================================* + * do_vm_call * + *===========================================================================*/ +int do_vm_call(message *m_out) +{ +/* A call that VM does to VFS. + * We must reply with the fixed type VM_VFS_REPLY (and put our result info + * in the rest of the message) so VM can tell the difference between a + * request from VFS and a reply to this call. + */ + int req = job_m_in.VFS_VMCALL_REQ; + int req_fd = job_m_in.VFS_VMCALL_FD; + u32_t req_id = job_m_in.VFS_VMCALL_REQID; + endpoint_t ep = job_m_in.VFS_VMCALL_ENDPOINT; + u64_t offset = make64(job_m_in.VFS_VMCALL_OFFSET_LO, + job_m_in.VFS_VMCALL_OFFSET_HI); + u32_t length = job_m_in.VFS_VMCALL_LENGTH; + int result = OK; + int slot; + struct fproc *rfp, *vmf; + struct filp *f = NULL; + + if(job_m_in.m_source != VM_PROC_NR) + return ENOSYS; + + if(isokendpt(ep, &slot) != OK) rfp = NULL; + else rfp = &fproc[slot]; + + vmf = &fproc[VM_PROC_NR]; + assert(fp == vmf); + assert(rfp != vmf); + + switch(req) { + case VMVFSREQ_FDLOOKUP: + { + int procfd; + + /* Lookup fd in referenced process. */ + + if(!rfp) { + printf("VFS: why isn't ep %d here?!\n", ep); + result = ESRCH; + goto reqdone; + } + + if((result = dupvm(rfp, req_fd, &procfd, &f)) != OK) { + printf("vfs: dupvm failed\n"); + goto reqdone; + } + + if(S_ISBLK(f->filp_vno->v_mode)) { + assert(f->filp_vno->v_sdev != NO_DEV); + m_out->VMV_DEV = f->filp_vno->v_sdev; + m_out->VMV_INO = VMC_NO_INODE; + m_out->VMV_SIZE_PAGES = LONG_MAX; + } else { + m_out->VMV_DEV = f->filp_vno->v_dev; + m_out->VMV_INO = f->filp_vno->v_inode_nr; + m_out->VMV_SIZE_PAGES = + roundup(f->filp_vno->v_size, + PAGE_SIZE)/PAGE_SIZE; + } + + m_out->VMV_FD = procfd; + + result = OK; + + break; + } + case VMVFSREQ_FDCLOSE: + { + result = close_fd(fp, req_fd); + if(result != OK) { + printf("VFS: VM fd close for fd %d, %d (%d)\n", + req_fd, fp->fp_endpoint, result); + } + break; + } + case VMVFSREQ_FDIO: + { + message dummy_out; + + result = actual_llseek(fp, &dummy_out, req_fd, + SEEK_SET, offset); + + if(result == OK) { + result = actual_read_write_peek(fp, PEEKING, + req_fd, NULL, length); + } + + break; + } + default: + panic("VFS: bad request code from VM\n"); + break; + } + +reqdone: + if(f) + unlock_filp(f); + + /* fp is VM still. */ + assert(fp == vmf); + m_out->VMV_ENDPOINT = ep; + m_out->VMV_RESULT = result; + m_out->VMV_REQID = req_id; + + return VM_VFS_REPLY; +} + /*===========================================================================* * pm_reboot * *===========================================================================*/ @@ -709,7 +869,7 @@ int pm_dumpcore(endpoint_t proc_e, int csig, vir_bytes exe_name) if ((f = get_filp(core_fd, VNODE_WRITE)) == NULL) { r=EBADF; goto core_exit; } write_elf_core_file(f, csig, proc_name); unlock_filp(f); - (void) close_fd(fp, core_fd); /* ignore failure, we're exiting anyway */ + (void) close_fd(fp, core_fd); /* ignore failure, we're exiting anyway */ core_exit: if(csig) @@ -768,3 +928,4 @@ void panic_hook(void) printf("VFS mthread stacktraces:\n"); mthread_stacktraces(); } + diff --git a/servers/vfs/open.c b/servers/vfs/open.c index 36b722660..9e6f2ec67 100644 --- a/servers/vfs/open.c +++ b/servers/vfs/open.c @@ -90,13 +90,16 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) struct vmnt *vmp; struct dmap *dp; struct lookup resolve; + int start = 0; /* Remap the bottom two bits of oflags. */ bits = (mode_t) mode_map[oflags & O_ACCMODE]; if (!bits) return(EINVAL); /* See if file descriptor and filp slots are available. */ - if ((r = get_fd(0, bits, &(scratch(fp).file.fd_nr), &filp)) != OK) return(r); + if ((r = get_fd(fp, start, bits, &(scratch(fp).file.fd_nr), + &filp)) != OK) + return(r); lookup_init(&resolve, path, PATH_NOFLAGS, &vmp, &vp); @@ -587,21 +590,13 @@ int do_mkdir(message *UNUSED(m_out)) return(r); } -/*===========================================================================* - * do_lseek * - *===========================================================================*/ -int do_lseek(message *m_out) +int actual_lseek(message *m_out, int seekfd, int seekwhence, off_t offset) { /* Perform the lseek(ls_fd, offset, whence) system call. */ register struct filp *rfilp; - int r = OK, seekfd, seekwhence; - off_t offset; + int r = OK; u64_t pos, newpos; - seekfd = job_m_in.ls_fd; - seekwhence = job_m_in.whence; - offset = (off_t) job_m_in.offset_lo; - /* Check to see if the file descriptor is valid. */ if ( (rfilp = get_filp(seekfd, VNODE_READ)) == NULL) return(err_code); @@ -647,23 +642,30 @@ int do_lseek(message *m_out) } /*===========================================================================* - * do_llseek * + * do_lseek * *===========================================================================*/ -int do_llseek(message *m_out) +int do_lseek(message *m_out) +{ + return actual_lseek(m_out, job_m_in.ls_fd, job_m_in.whence, + (off_t) job_m_in.offset_lo); +} + +/*===========================================================================* + * actual_llseek * + *===========================================================================*/ +int actual_llseek(struct fproc *rfp, message *m_out, int seekfd, int seekwhence, + u64_t offset) { /* Perform the llseek(ls_fd, offset, whence) system call. */ register struct filp *rfilp; u64_t pos, newpos; - int r = OK, seekfd, seekwhence; - long off_hi, off_lo; - - seekfd = job_m_in.ls_fd; - seekwhence = job_m_in.whence; - off_hi = job_m_in.offset_high; - off_lo = job_m_in.offset_lo; + int r = OK; + long off_hi = ex64hi(offset); /* Check to see if the file descriptor is valid. */ - if ( (rfilp = get_filp(seekfd, VNODE_READ)) == NULL) return(err_code); + if ( (rfilp = get_filp2(rfp, seekfd, VNODE_READ)) == NULL) { + return(err_code); + } /* No lseek on pipes. */ if (S_ISFIFO(rfilp->filp_vno->v_mode)) { @@ -679,7 +681,7 @@ int do_llseek(message *m_out) default: unlock_filp(rfilp); return(EINVAL); } - newpos = add64(pos, make64(off_lo, off_hi)); + newpos = pos + offset; /* Check for overflow. */ if ((off_hi > 0) && cmp64(newpos, pos) < 0) @@ -704,15 +706,20 @@ int do_llseek(message *m_out) return(r); } +int do_llseek(message *m_out) +{ + return actual_llseek(fp, m_out, job_m_in.ls_fd, job_m_in.whence, + make64(job_m_in.offset_lo, job_m_in.offset_high)); +} + /*===========================================================================* * do_close * *===========================================================================*/ int do_close(message *UNUSED(m_out)) { /* Perform the close(fd) system call. */ - - scratch(fp).file.fd_nr = job_m_in.fd; - return close_fd(fp, scratch(fp).file.fd_nr); + int thefd = job_m_in.fd; + return close_fd(fp, thefd); } @@ -731,10 +738,16 @@ int fd_nr; /* First locate the vnode that belongs to the file descriptor. */ if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code); + vp = rfilp->filp_vno; - close_filp(rfilp); + /* first, make all future get_filp2()'s fail; otherwise + * we might try to close the same fd in different threads + */ rfp->fp_filp[fd_nr] = NULL; + + close_filp(rfilp); + FD_CLR(fd_nr, &rfp->fp_cloexec_set); FD_CLR(fd_nr, &rfp->fp_filp_inuse); diff --git a/servers/vfs/pipe.c b/servers/vfs/pipe.c index a6cc0f9e0..7870d7ae1 100644 --- a/servers/vfs/pipe.c +++ b/servers/vfs/pipe.c @@ -101,7 +101,7 @@ static int create_pipe(int fil_des[2], int flags) /* Acquire two file descriptors. */ rfp = fp; - if ((r = get_fd(0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) { + if ((r = get_fd(fp, 0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) { unlock_vnode(vp); unlock_vmnt(vmp); return(r); @@ -109,7 +109,7 @@ static int create_pipe(int fil_des[2], int flags) rfp->fp_filp[fil_des[0]] = fil_ptr0; FD_SET(fil_des[0], &rfp->fp_filp_inuse); fil_ptr0->filp_count = 1; /* mark filp in use */ - if ((r = get_fd(0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) { + if ((r = get_fd(fp, 0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) { rfp->fp_filp[fil_des[0]] = NULL; FD_CLR(fil_des[0], &rfp->fp_filp_inuse); fil_ptr0->filp_count = 0; /* mark filp free */ diff --git a/servers/vfs/proto.h b/servers/vfs/proto.h index a2fe8728b..339949042 100644 --- a/servers/vfs/proto.h +++ b/servers/vfs/proto.h @@ -86,10 +86,10 @@ void check_filp_locks(void); void check_filp_locks_by_me(void); void init_filps(void); struct filp *find_filp(struct vnode *vp, mode_t bits); -int get_fd(int start, mode_t bits, int *k, struct filp **fpt); +int get_fd(struct fproc *rfp, int start, mode_t bits, int *k, + struct filp **fpt); struct filp *get_filp(int fild, tll_access_t locktype); -struct filp *get_filp2(struct fproc *rfp, int fild, tll_access_t - locktype); +struct filp *get_filp2(struct fproc *rfp, int fild, tll_access_t locktype); void lock_filp(struct filp *filp, tll_access_t locktype); void unlock_filp(struct filp *filp); void unlock_filps(struct filp *filp1, struct filp *filp2); @@ -143,8 +143,10 @@ int do_fsync(message *m_out); void pm_reboot(void); int do_svrctl(message *m_out); int do_getsysinfo(void); +int do_vm_call(message *m_out); int pm_dumpcore(endpoint_t proc_e, int sig, vir_bytes exe_name); void * ds_event(void *arg); +int dupvm(struct fproc *fp, int pfd, int *vmfd, struct filp **f); /* mount.c */ int do_fsready(message *m_out); @@ -169,6 +171,9 @@ int do_mknod(message *m_out); int do_mkdir(message *m_out); int do_open(message *m_out); int do_slink(message *m_out); +int actual_lseek(message *m_out, int seekfd, int seekwhence, off_t offset); +int actual_llseek(struct fproc *rfp, message *m_out, int seekfd, + int seekwhence, u64_t offset); int do_vm_open(void); int do_vm_close(void); @@ -213,8 +218,10 @@ void lock_bsf(void); void unlock_bsf(void); void check_bsf_lock(void); int do_read_write_peek(int rw_flag, int fd, char *buf, size_t bytes); -int read_write(int rw_flag, struct filp *f, char *buffer, size_t nbytes, - endpoint_t for_e); +int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, char *buf, + size_t bytes); +int read_write(struct fproc *rfp, int rw_flag, struct filp *f, char *buffer, + size_t nbytes, endpoint_t for_e); int rw_pipe(int rw_flag, endpoint_t usr, struct filp *f, char *buf, size_t req_size); diff --git a/servers/vfs/read.c b/servers/vfs/read.c index b278738f0..57bf38615 100644 --- a/servers/vfs/read.c +++ b/servers/vfs/read.c @@ -83,9 +83,10 @@ void check_bsf_lock(void) } /*===========================================================================* - * do_read_write_peek * + * actual_read_write_peek * *===========================================================================*/ -int do_read_write_peek(int rw_flag, int io_fd, char *io_buf, size_t io_nbytes) +int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd, + char *io_buf, size_t io_nbytes) { /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ struct filp *f; @@ -95,37 +96,48 @@ int do_read_write_peek(int rw_flag, int io_fd, char *io_buf, size_t io_nbytes) if(rw_flag == WRITING) ro = 0; - scratch(fp).file.fd_nr = io_fd; - scratch(fp).io.io_buffer = io_buf; - scratch(fp).io.io_nbytes = io_nbytes; + scratch(rfp).file.fd_nr = io_fd; + scratch(rfp).io.io_buffer = io_buf; + scratch(rfp).io.io_nbytes = io_nbytes; - locktype = ro ? VNODE_READ : VNODE_WRITE; - if ((f = get_filp(scratch(fp).file.fd_nr, locktype)) == NULL) + locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ; + if ((f = get_filp2(rfp, scratch(rfp).file.fd_nr, locktype)) == NULL) return(err_code); + + assert(f->filp_count > 0); + if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) { unlock_filp(f); return(f->filp_mode == FILP_CLOSED ? EIO : EBADF); } - if (scratch(fp).io.io_nbytes == 0) { + if (scratch(rfp).io.io_nbytes == 0) { unlock_filp(f); return(0); /* so char special files need not check for 0*/ } - r = read_write(rw_flag, f, scratch(fp).io.io_buffer, scratch(fp).io.io_nbytes, - who_e); + r = read_write(rfp, rw_flag, f, scratch(rfp).io.io_buffer, + scratch(rfp).io.io_nbytes, who_e); unlock_filp(f); return(r); } +/*===========================================================================* + * do_read_write_peek * + *===========================================================================*/ +int do_read_write_peek(int rw_flag, int io_fd, char *io_buf, size_t io_nbytes) +{ + return actual_read_write_peek(fp, rw_flag, io_fd, io_buf, io_nbytes); +} + /*===========================================================================* * read_write * *===========================================================================*/ -int read_write(int rw_flag, struct filp *f, char *buf, size_t size, - endpoint_t for_e) +int read_write(struct fproc *rfp, int rw_flag, struct filp *f, + char *buf, size_t size, endpoint_t for_e) { register struct vnode *vp; - u64_t position, res_pos, new_pos; + u64_t position, res_pos; unsigned int cum_io, cum_io_incr, res_cum_io; int op, r; @@ -141,17 +153,23 @@ int read_write(int rw_flag, struct filp *f, char *buf, size_t size, op = (rw_flag == READING ? VFS_DEV_READ : VFS_DEV_WRITE); if (S_ISFIFO(vp->v_mode)) { /* Pipes */ - if (fp->fp_cum_io_partial != 0) { + if (rfp->fp_cum_io_partial != 0) { panic("VFS: read_write: fp_cum_io_partial not clear"); } - if(rw_flag == PEEKING) return EINVAL; + if(rw_flag == PEEKING) { + printf("read_write: peek on pipe makes no sense\n"); + return EINVAL; + } r = rw_pipe(rw_flag, for_e, f, buf, size); } else if (S_ISCHR(vp->v_mode)) { /* Character special files. */ dev_t dev; int suspend_reopen; int op = (rw_flag == READING ? VFS_DEV_READ : VFS_DEV_WRITE); - if(rw_flag == PEEKING) return EINVAL; + if(rw_flag == PEEKING) { + printf("read_write: peek on char device makes no sense\n"); + return EINVAL; + } if (vp->v_sdev == NO_DEV) panic("VFS: read_write tries to access char dev NO_DEV"); @@ -170,15 +188,17 @@ int read_write(int rw_flag, struct filp *f, char *buf, size_t size, if (vp->v_sdev == NO_DEV) panic("VFS: read_write tries to access block dev NO_DEV"); - if(rw_flag == PEEKING) return EINVAL; - lock_bsf(); - r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position, size, - buf, rw_flag, &res_pos, &res_cum_io); - if (r == OK) { - position = res_pos; - cum_io += res_cum_io; + if(rw_flag == PEEKING) { + r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size); + } else { + r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, + position, size, buf, rw_flag, &res_pos, &res_cum_io); + if (r == OK) { + position = res_pos; + cum_io += res_cum_io; + } } unlock_bsf(); @@ -189,16 +209,21 @@ int read_write(int rw_flag, struct filp *f, char *buf, size_t size, } /* Issue request */ - r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, rw_flag, for_e, - buf, size, &new_pos, &cum_io_incr); + if(rw_flag == PEEKING) { + r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size); + } else { + u64_t new_pos; + r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, + rw_flag, for_e, buf, size, &new_pos, &cum_io_incr); - if (r >= 0) { - if (ex64hi(new_pos)) - panic("read_write: bad new pos"); + if (r >= 0) { + if (ex64hi(new_pos)) + panic("read_write: bad new pos"); - position = new_pos; - cum_io += cum_io_incr; - } + position = new_pos; + cum_io += cum_io_incr; + } + } } /* On write, update file size and access time. */ @@ -220,7 +245,7 @@ int read_write(int rw_flag, struct filp *f, char *buf, size_t size, * generate s SIGPIPE signal. */ if (!(f->filp_flags & O_NOSIGPIPE)) { - sys_kill(fp->fp_endpoint, SIGPIPE); + sys_kill(rfp->fp_endpoint, SIGPIPE); } } @@ -292,6 +317,8 @@ size_t req_size; vp = f->filp_vno; position = cvu64(0); /* Not actually used */ + assert(rw_flag == READING || rw_flag == WRITING); + /* fp->fp_cum_io_partial is only nonzero when doing partial writes */ cum_io = fp->fp_cum_io_partial; diff --git a/servers/vfs/table.c b/servers/vfs/table.c index 5fe5b817e..6aa4f734b 100644 --- a/servers/vfs/table.c +++ b/servers/vfs/table.c @@ -132,6 +132,7 @@ int (*call_vec[])(message *m_out) = { no_sys, /* 114 = (clock_getres) */ no_sys, /* 115 = (clock_gettime) */ no_sys, /* 116 = (clock_settime) */ + do_vm_call, /* 117 = call from vm */ }; /* This should not fail with "array size is negative": */ extern int dummy[sizeof(call_vec) == NCALLS * sizeof(call_vec[0]) ? 1 : -1]; diff --git a/servers/vfs/utility.c b/servers/vfs/utility.c index db2f831d6..687b53d95 100644 --- a/servers/vfs/utility.c +++ b/servers/vfs/utility.c @@ -122,8 +122,6 @@ int isokendpt_f(char *file, int line, endpoint_t endpoint, int *proc, failed = 1; } else if ((ke = fproc[*proc].fp_endpoint) != endpoint) { if(ke == NONE) { - printf("VFS %s:%d: endpoint (%d) points to NONE slot (%d)\n", - file, line, endpoint, *proc); assert(fproc[*proc].fp_pid == PID_FREE); } else { printf("VFS %s:%d: proc (%d) from endpoint (%d) doesn't match " diff --git a/sys/sys/mman.h b/sys/sys/mman.h index 2c37e14c4..c4d1f838a 100644 --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -92,6 +92,7 @@ typedef __off_t off_t; /* file offset */ #define MAP_FIXED 0x0200 /* require mapping to happen at hint */ #define MAP_THIRDPARTY 0x0400 /* perform on behalf of any process */ #define MAP_UNINITIALIZED 0x0800 /* do not clear memory */ +#define MAP_FILE 0x1000 /* it's a file */ /* * Error indicator returned by mmap(2)