vm: mmap support

. test74 for mmap functionality
	. vm: add a mem_file memory type that specifies an mmap()ped
	  memory range, backed by a file
	. add fdref, an object that keeps track of FD references within
	  VM per process and so knows how to de-duplicate the use of FD's
	  by various mmap()ped ranges; there can be many more than there can
	  be FD's
	. turned off for now, enable with 'filemap=1' as boot option

Change-Id: I640b1126cdaa522a0560301cf6732b7661555672
This commit is contained in:
Ben Gras 2013-05-07 12:36:09 +00:00
parent f369157d95
commit 49b9165251
22 changed files with 953 additions and 59 deletions

View file

@ -4643,6 +4643,7 @@
./usr/tests/minix-posix/test71 minix-sys ./usr/tests/minix-posix/test71 minix-sys
./usr/tests/minix-posix/test72 minix-sys ./usr/tests/minix-posix/test72 minix-sys
./usr/tests/minix-posix/test73 minix-sys ./usr/tests/minix-posix/test73 minix-sys
./usr/tests/minix-posix/test74 minix-sys
./usr/tests/minix-posix/test7 minix-sys ./usr/tests/minix-posix/test7 minix-sys
./usr/tests/minix-posix/test8 minix-sys ./usr/tests/minix-posix/test8 minix-sys
./usr/tests/minix-posix/test9 minix-sys ./usr/tests/minix-posix/test9 minix-sys

View file

@ -4,5 +4,6 @@ default=2
menu=Start MINIX 3:load_mods /boot/minix_default/mod*;multiboot /boot/minix_default/kernel rootdevname=$rootdevname $args menu=Start MINIX 3:load_mods /boot/minix_default/mod*;multiboot /boot/minix_default/kernel rootdevname=$rootdevname $args
menu=Start latest MINIX 3:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname $args menu=Start latest MINIX 3:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname $args
menu=Start latest MINIX 3 in single user mode:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname bootopts=-s $args menu=Start latest MINIX 3 in single user mode:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname bootopts=-s $args
menu=Start latest MINIX 3 with file mmap:load_mods /boot/minix_latest/mod*;multiboot /boot/minix_latest/kernel rootdevname=$rootdevname filemap=1 $args
menu=Edit menu option:edit menu=Edit menu option:edit
menu=Drop to boot prompt:prompt menu=Drop to boot prompt:prompt

View file

@ -6,7 +6,7 @@ SRCS= main.c alloc.c utility.c exit.c fork.c break.c \
mmap.c slaballoc.c region.c pagefaults.c \ mmap.c slaballoc.c region.c pagefaults.c \
rs.c queryexit.c pb.c regionavl.c \ rs.c queryexit.c pb.c regionavl.c \
mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c \ mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c \
mem_cache.c cache.c mem_cache.c cache.c vfs.c mem_file.c fdref.c
.if ${MACHINE_ARCH} == "earm" .if ${MACHINE_ARCH} == "earm"
LDFLAGS+= -T ${.CURDIR}/arch/${MACHINE_ARCH}/vm.lds LDFLAGS+= -T ${.CURDIR}/arch/${MACHINE_ARCH}/vm.lds

View file

@ -1330,10 +1330,6 @@ int pt_bind(pt_t *pt, struct vmproc *who)
pdeslot * ARCH_PAGEDIR_SIZE); pdeslot * ARCH_PAGEDIR_SIZE);
#endif #endif
#if 0
printf("VM: slot %d endpoint %d has pde val 0x%lx at kernel address 0x%lx\n",
slot, who->vm_endpoint, page_directories[slot], pdes);
#endif
/* Tell kernel about new page table root. */ /* Tell kernel about new page table root. */
return sys_vmctl_set_addrspace(who->vm_endpoint, pt->pt_dir_phys, pdes); return sys_vmctl_set_addrspace(who->vm_endpoint, pt->pt_dir_phys, pdes);
} }

177
servers/vm/fdref.c Normal file
View file

@ -0,0 +1,177 @@
/* File that implements the 'fdref' data structure. It keeps track
* of how many times a particular fd (per process) is referenced by
* mmapped objects.
*
* This is used to
* - have many references to the same file, without needing an FD each
* - deciding when we have to close an FD (last reference disappears)
*
* Examples:
* - if a file-mmapped region is split, the refcount increases; there are
* now two regions referencing the same FD. We can't simply close the
* FD once either region is unmapped, as the pagefaults for the other
* would stop working. So we increase the refcount to that fd.
* - if a new file-maped region is requested, we might find out it's the
* same dev/inode the same process already has referenced. we could
* decide to close the new reference and use an existing one, so
* references to the same file aren't fd-limited.
* - if a file-mapped region is copied, we have to create a new
* fdref object, as the source process might disappear; we have to
* use the new process' fd for it.
*/
#include <assert.h>
#include <string.h>
#include <minix/hash.h>
#include "proto.h"
#include "vm.h"
#include "fdref.h"
#include "vmproc.h"
#include "glo.h"
static struct fdref *fdrefs;
void fdref_sanitycheck(void)
{
struct vmproc *vmp;
region_iter v_iter;
struct fdref *fr;
static int prevopen = 0;
int openfd = 0;
for(fr = fdrefs; fr; fr = fr->next) {
struct fdref *fr2;
for(fr2 = fdrefs; fr2; fr2 = fr2->next) {
if(fr == fr2) continue;
if(fr->fd == fr2->fd) {
printf("equal fd omg\n");
util_stacktrace();
}
if(fr->ino == fr2->ino && fr->dev == fr2->dev) {
printf("equal metadata omg\n");
util_stacktrace();
}
}
openfd++;
}
for(fr = fdrefs; fr; fr = fr->next) {
fr->counting = 0;
}
for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) {
struct vir_region *vr;
if(!(vmp->vm_flags & VMF_INUSE))
continue;
region_start_iter_least(&vmp->vm_regions_avl, &v_iter);
while((vr = region_get_iter(&v_iter))) {
if(vr->def_memtype == &mem_type_mappedfile && vr->param.file.inited) {
vr->param.file.fdref->counting++;
}
region_incr_iter(&v_iter);
}
}
for(fr = fdrefs; fr; fr = fr->next) {
if(fr->counting != fr->refcount) {
printf("counting %d != refcount %d\n",
fr->counting, fr->refcount);
util_stacktrace();
}
}
if(prevopen != openfd && openfd > 100) {
printf("%d open\n", openfd);
prevopen = openfd;
}
}
struct fdref *fdref_new(struct vmproc *owner, ino_t ino, dev_t dev, int fd)
{
struct fdref *fdref;
if(!SLABALLOC(fdref)) return NULL;
fdref->fd = fd;
fdref->refcount = 0;
fdref->dev = dev;
fdref->ino = ino;
fdref->next = fdrefs;
fdrefs = fdref;
return fdref;
}
void fdref_ref(struct fdref *ref, struct vir_region *region)
{
assert(ref);
region->param.file.fdref = ref;
ref->refcount++;
}
void fdref_deref(struct vir_region *region)
{
struct fdref *ref = region->param.file.fdref;
int fd;
assert(ref);
assert(ref->refcount > 0);
fd = ref->fd;
region->param.file.fdref = NULL;
ref->refcount--;
assert(ref->refcount >= 0);
if(ref->refcount > 0) return;
if(fdrefs == ref) fdrefs = ref->next;
else {
struct fdref *r;
for(r = fdrefs; r->next != ref; r = r->next)
;
assert(r);
assert(r->next == ref);
r->next = ref->next;
}
SLABFREE(ref);
ref = NULL;
/* If the last reference has disappeared, free the
* ref object and asynchronously close the fd in VFS.
*
* We don't need a callback as a close failing, although
* unexpected, isn't a problem and can't be handled. VFS
* will print a diagnostic.
*/
if(vfs_request(VMVFSREQ_FDCLOSE, fd, region->parent,
0, 0, NULL, NULL, NULL, 0) != OK) {
panic("fdref_deref: could not send close request");
}
}
struct fdref *fdref_dedup_or_new(struct vmproc *owner,
ino_t ino, dev_t dev, int fd, int mayclose)
{
struct fdref *fr;
for(fr = fdrefs; fr; fr = fr->next) {
if(ino == fr->ino && dev == fr->dev) {
if(fd == fr->fd) {
return fr;
}
if(!mayclose) continue;
if(vfs_request(VMVFSREQ_FDCLOSE, fd, owner,
0, 0, NULL, NULL, NULL, 0) != OK) {
printf("fdref_dedup_or_new: could not close\n");
}
return fr;
}
}
return fdref_new(owner, ino, dev, fd);
}

29
servers/vm/fdref.h Normal file
View file

@ -0,0 +1,29 @@
#ifndef _FDREF_H
#define _FDREF_H 1
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/config.h>
#include <minix/const.h>
#include <minix/ds.h>
#include <minix/endpoint.h>
#include <minix/keymap.h>
#include <minix/minlib.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/const.h>
struct fdref {
int fd;
int refcount;
dev_t dev;
ino_t ino;
struct fdref *next;
int counting; /* sanity check */
} *fdref;
#endif

View file

@ -18,6 +18,8 @@
EXTERN struct vmproc vmproc[VMP_NR]; EXTERN struct vmproc vmproc[VMP_NR];
long enable_filemap;
EXTERN kinfo_t kernel_boot_info; EXTERN kinfo_t kernel_boot_info;
#if SANITYCHECKS #if SANITYCHECKS

View file

@ -323,6 +323,9 @@ void init_vm(void)
panic("couldn't get bootinfo: %d", s); panic("couldn't get bootinfo: %d", s);
} }
/* Turn file mmap on? */
env_parse("filemap", "d", 0, &enable_filemap, 0, 1);
/* Sanity check */ /* Sanity check */
assert(kernel_boot_info.mmap_size > 0); assert(kernel_boot_info.mmap_size > 0);
assert(kernel_boot_info.mods_with_kernel > 0); assert(kernel_boot_info.mods_with_kernel > 0);
@ -414,6 +417,10 @@ void init_vm(void)
CALLMAP(VM_WILLEXIT, do_willexit); CALLMAP(VM_WILLEXIT, do_willexit);
CALLMAP(VM_NOTIFY_SIG, do_notify_sig); CALLMAP(VM_NOTIFY_SIG, do_notify_sig);
/* Calls from VFS. */
CALLMAP(VM_VFS_REPLY, do_vfs_reply);
CALLMAP(VM_VFS_MMAP, do_vfs_mmap);
/* Calls from RS */ /* Calls from RS */
CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv); CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv);
CALLMAP(VM_RS_UPDATE, do_rs_update); CALLMAP(VM_RS_UPDATE, do_rs_update);

View file

@ -118,7 +118,6 @@ do_mapcache(message *msg)
printf("VM: map_pf failed\n"); printf("VM: map_pf failed\n");
return ENOMEM; return ENOMEM;
} }
assert(!vr->param.pb_cache); assert(!vr->param.pb_cache);
} }

251
servers/vm/mem_file.c Normal file
View file

@ -0,0 +1,251 @@
/* This file implements the methods of memory-mapped files. */
#include <assert.h>
#include "proto.h"
#include "vm.h"
#include "region.h"
#include "glo.h"
#include "cache.h"
/* These functions are static so as to not pollute the
* global namespace, and are accessed through their function
* pointers.
*/
static void mappedfile_split(struct vmproc *vmp, struct vir_region *vr,
struct vir_region *r1, struct vir_region *r2);
static int mappedfile_unreference(struct phys_region *pr);
static int mappedfile_pagefault(struct vmproc *vmp, struct vir_region *region,
struct phys_region *ph, int write, vfs_callback_t callback, void *, int);
static int mappedfile_sanitycheck(struct phys_region *pr, char *file, int line);
static int mappedfile_writable(struct phys_region *pr);
static int mappedfile_copy(struct vir_region *vr, struct vir_region *newvr);
static int mappedfile_lowshrink(struct vir_region *vr, vir_bytes len);
static void mappedfile_delete(struct vir_region *region);
struct mem_type mem_type_mappedfile = {
.name = "file-mapped memory",
.ev_unreference = mappedfile_unreference,
.ev_pagefault = mappedfile_pagefault,
.ev_sanitycheck = mappedfile_sanitycheck,
.ev_copy = mappedfile_copy,
.writable = mappedfile_writable,
.ev_split = mappedfile_split,
.ev_lowshrink = mappedfile_lowshrink,
.ev_delete = mappedfile_delete,
};
static int mappedfile_unreference(struct phys_region *pr)
{
assert(pr->ph->refcount == 0);
if(pr->ph->phys != MAP_NONE)
free_mem(ABS2CLICK(pr->ph->phys), 1);
return OK;
}
static int cow_block(struct vmproc *vmp, struct vir_region *region,
struct phys_region *ph, u16_t clearend)
{
int r;
if((r=mem_cow(region, ph, MAP_NONE, MAP_NONE)) != OK) {
printf("mappedfile_pagefault: COW failed\n");
return r;
}
/* After COW we are a normal piece of anonymous memory. */
ph->memtype = &mem_type_anon;
if(clearend) {
phys_bytes phaddr = ph->ph->phys, po = VM_PAGE_SIZE-clearend;
assert(clearend < VM_PAGE_SIZE);
phaddr += po;
if(sys_memset(NONE, 0, phaddr, clearend) != OK) {
panic("cow_block: clearend failed\n");
}
}
return OK;
}
static int mappedfile_pagefault(struct vmproc *vmp, struct vir_region *region,
struct phys_region *ph, int write, vfs_callback_t cb,
void *state, int statelen)
{
u32_t allocflags;
int procfd = region->param.file.fdref->fd;
allocflags = vrallocflags(region->flags);
assert(ph->ph->refcount > 0);
assert(region->param.file.inited);
assert(region->param.file.fdref);
assert(region->param.file.fdref->dev != NO_DEV);
/* Totally new block? Create it. */
if(ph->ph->phys == MAP_NONE) {
struct cached_page *cp;
u64_t referenced_offset =
region->param.file.offset + ph->offset;
if(region->param.file.fdref->ino == VMC_NO_INODE) {
cp = find_cached_page_bydev(region->param.file.fdref->dev,
referenced_offset, VMC_NO_INODE, 0, 1);
} else {
cp = find_cached_page_byino(region->param.file.fdref->dev,
region->param.file.fdref->ino, referenced_offset, 1);
}
if(cp) {
int result = OK;
pb_unreferenced(region, ph, 0);
pb_link(ph, cp->page, ph->offset, region);
if(roundup(ph->offset+region->param.file.clearend,
VM_PAGE_SIZE) >= region->length) {
result = cow_block(vmp, region, ph,
region->param.file.clearend);
} else if(result == OK && write) {
result = cow_block(vmp, region, ph, 0);
}
return result;
}
if(!cb) {
printf("VM: mem_file: no callback, returning EFAULT\n");
sys_sysctl_stacktrace(vmp->vm_endpoint);
return EFAULT;
}
if(vfs_request(VMVFSREQ_FDIO, procfd, vmp, referenced_offset,
VM_PAGE_SIZE, cb, NULL, state, statelen) != OK) {
printf("VM: mappedfile_pagefault: vfs_request failed\n");
return ENOMEM;
}
return SUSPEND;
}
if(!write) {
printf("mappedfile_pagefault: nonwrite fault?\n");
return EFAULT;
}
return cow_block(vmp, region, ph, 0);
}
static int mappedfile_sanitycheck(struct phys_region *pr, char *file, int line)
{
MYASSERT(usedpages_add(pr->ph->phys, VM_PAGE_SIZE) == OK);
return OK;
}
static int mappedfile_writable(struct phys_region *pr)
{
/* We are never writable. */
return 0;
}
int mappedfile_copy(struct vir_region *vr, struct vir_region *newvr)
{
assert(vr->param.file.inited);
mappedfile_setfile(newvr->parent, newvr, vr->param.file.fdref->fd,
vr->param.file.offset,
vr->param.file.fdref->dev, vr->param.file.fdref->ino,
vr->param.file.clearend, 0, 0);
assert(newvr->param.file.inited);
return OK;
}
int mappedfile_setfile(struct vmproc *owner,
struct vir_region *region, int fd, u64_t offset,
dev_t dev, ino_t ino, u16_t clearend, int prefill, int mayclosefd)
{
vir_bytes vaddr;
struct fdref *newref;
newref = fdref_dedup_or_new(owner, ino, dev, fd, mayclosefd);
assert(newref);
assert(!region->param.file.inited);
assert(dev != NO_DEV);
fdref_ref(newref, region);
region->param.file.offset = offset;
region->param.file.clearend = clearend;
region->param.file.inited = 1;
if(!prefill) return OK;
for(vaddr = 0; vaddr < region->length; vaddr+=VM_PAGE_SIZE) {
struct cached_page *cp = NULL;
struct phys_region *pr;
u64_t referenced_offset = offset + vaddr;
if(roundup(vaddr+region->param.file.clearend,
VM_PAGE_SIZE) >= region->length) {
break;
}
if(ino == VMC_NO_INODE) {
cp = find_cached_page_bydev(dev, referenced_offset,
VMC_NO_INODE, 0, 1);
} else {
cp = find_cached_page_byino(dev, ino,
referenced_offset, 1);
}
if(!cp) continue;
if(!(pr = pb_reference(cp->page, vaddr, region,
&mem_type_mappedfile))) {
printf("mappedfile_setfile: pb_reference failed\n");
break;
}
if(map_ph_writept(region->parent, region, pr) != OK) {
printf("mappedfile_setfile: map_ph_writept failed\n");
break;
}
}
return OK;
}
static void mappedfile_split(struct vmproc *vmp, struct vir_region *vr,
struct vir_region *r1, struct vir_region *r2)
{
assert(!r1->param.file.inited);
assert(!r2->param.file.inited);
assert(vr->param.file.inited);
assert(r1->length + r2->length == vr->length);
assert(vr->def_memtype == &mem_type_mappedfile);
assert(r1->def_memtype == &mem_type_mappedfile);
assert(r2->def_memtype == &mem_type_mappedfile);
r1->param.file = vr->param.file;
r2->param.file = vr->param.file;
fdref_ref(vr->param.file.fdref, r1);
fdref_ref(vr->param.file.fdref, r2);
r1->param.file.clearend = 0;
r2->param.file.offset += r1->length;
assert(r1->param.file.inited);
assert(r2->param.file.inited);
}
static int mappedfile_lowshrink(struct vir_region *vr, vir_bytes len)
{
assert(vr->param.file.inited);
vr->param.file.offset += len;
return OK;
}
static void mappedfile_delete(struct vir_region *region)
{
assert(region->def_memtype == &mem_type_mappedfile);
assert(region->param.file.inited);
assert(region->param.file.fdref);
fdref_deref(region);
region->param.file.inited = 0;
}

View file

@ -81,6 +81,127 @@ static struct vir_region *mmap_region(struct vmproc *vmp, vir_bytes addr,
return vr; return vr;
} }
static int mmap_file(struct vmproc *vmp,
int vmfd, u32_t off_lo, u32_t off_hi, int flags,
ino_t ino, dev_t dev, u64_t filesize, vir_bytes addr, vir_bytes len,
vir_bytes *retaddr, u16_t clearend, int writable, int mayclosefd)
{
/* VFS has replied to a VMVFSREQ_FDLOOKUP request. */
struct vir_region *vr;
u64_t file_offset, page_offset;
int result = OK;
u32_t vrflags = 0;
if(writable) vrflags |= VR_WRITABLE;
if(flags & MAP_THIRDPARTY) {
file_offset = off_lo;
} else {
file_offset = make64(off_lo, off_hi);
if(off_hi && !off_lo) {
/* XXX clang compatability hack */
off_hi = file_offset = 0;
}
}
/* Do some page alignments. */
if((page_offset = (file_offset % VM_PAGE_SIZE))) {
file_offset -= page_offset;
len += page_offset;
}
len = roundup(len, VM_PAGE_SIZE);
/* All numbers should be page-aligned now. */
assert(!(len % VM_PAGE_SIZE));
assert(!(filesize % VM_PAGE_SIZE));
assert(!(file_offset % VM_PAGE_SIZE));
#if 0
/* XXX ld.so relies on longer-than-file mapping */
if((u64_t) len + file_offset > filesize) {
printf("VM: truncating mmap dev 0x%x ino %d beyond file size in %d; offset %llu, len %lu, size %llu; ",
dev, ino, vmp->vm_endpoint,
file_offset, len, filesize);
len = filesize - file_offset;
return EINVAL;
}
#endif
if(!(vr = mmap_region(vmp, addr, flags, len,
vrflags, &mem_type_mappedfile, 0))) {
result = ENOMEM;
} else {
*retaddr = vr->vaddr + page_offset;
result = OK;
mappedfile_setfile(vmp, vr, vmfd,
file_offset, dev, ino, clearend, 1, mayclosefd);
}
return result;
}
int do_vfs_mmap(message *m)
{
vir_bytes v;
struct vmproc *vmp;
int r, n;
u16_t clearend, flags = 0;
/* It might be disabled */
if(!enable_filemap) return ENXIO;
clearend = (m->m_u.m_vm_vfs.clearend_and_flags & MVM_LENMASK);
flags = (m->m_u.m_vm_vfs.clearend_and_flags & MVM_FLAGSMASK);
if((r=vm_isokendpt(m->m_u.m_vm_vfs.who, &n)) != OK)
panic("bad ep %d from vfs", m->m_u.m_vm_vfs.who);
vmp = &vmproc[n];
return mmap_file(vmp, m->m_u.m_vm_vfs.fd, m->m_u.m_vm_vfs.offset, 0,
MAP_PRIVATE | MAP_FIXED,
m->m_u.m_vm_vfs.ino, m->m_u.m_vm_vfs.dev,
(u64_t) LONG_MAX * VM_PAGE_SIZE,
m->m_u.m_vm_vfs.vaddr, m->m_u.m_vm_vfs.len, &v,
clearend, flags, 0);
}
static void mmap_file_cont(struct vmproc *vmp, message *replymsg, void *cbarg,
void *origmsg_v)
{
message *origmsg = (message *) origmsg_v;
message mmap_reply;
int result;
int writable = 0;
vir_bytes v = (vir_bytes) MAP_FAILED;
if(origmsg->VMM_PROT & PROT_WRITE)
writable = 1;
if(replymsg->VMV_RESULT != OK) {
printf("VM: VFS reply failed (%d)\n", replymsg->VMV_RESULT);
sys_sysctl_stacktrace(vmp->vm_endpoint);
result = origmsg->VMV_RESULT;
} else {
/* Finish mmap */
result = mmap_file(vmp, replymsg->VMV_FD, origmsg->VMM_OFFSET_LO,
origmsg->VMM_OFFSET_HI, origmsg->VMM_FLAGS,
replymsg->VMV_INO, replymsg->VMV_DEV,
(u64_t) replymsg->VMV_SIZE_PAGES*PAGE_SIZE,
origmsg->VMM_ADDR,
origmsg->VMM_LEN, &v, 0, writable, 1);
}
/* Unblock requesting process. */
memset(&mmap_reply, 0, sizeof(mmap_reply));
mmap_reply.m_type = result;
mmap_reply.VMM_ADDR = v;
if(send(vmp->vm_endpoint, &mmap_reply) != OK)
panic("VM: mmap_file_cont: send() failed");
}
/*===========================================================================* /*===========================================================================*
* do_mmap * * do_mmap *
*===========================================================================*/ *===========================================================================*/
@ -111,11 +232,16 @@ int do_mmap(message *m)
vmp = &vmproc[n]; vmp = &vmproc[n];
/* "SUSv3 specifies that mmap() should fail if length is 0" */
if(len <= 0) {
return EINVAL;
}
if(m->VMM_FD == -1 || (m->VMM_FLAGS & MAP_ANON)) { if(m->VMM_FD == -1 || (m->VMM_FLAGS & MAP_ANON)) {
/* actual memory in some form */ /* actual memory in some form */
mem_type_t *mt = NULL; mem_type_t *mt = NULL;
if(m->VMM_FD != -1 || len <= 0) { if(m->VMM_FD != -1) {
printf("VM: mmap: fd %d, len 0x%x\n", m->VMM_FD, len); printf("VM: mmap: fd %d, len 0x%x\n", m->VMM_FD, len);
return EINVAL; return EINVAL;
} }
@ -134,9 +260,25 @@ int do_mmap(message *m)
return ENOMEM; return ENOMEM;
} }
} else { } else {
/* File mapping might be disabled */
if(!enable_filemap) return ENXIO;
/* files get private copies of pages on writes. */
if(!(m->VMM_FLAGS & MAP_PRIVATE)) {
printf("VM: mmap file must MAP_PRIVATE\n");
return ENXIO; return ENXIO;
} }
if(vfs_request(VMVFSREQ_FDLOOKUP, m->VMM_FD, vmp, 0, 0,
mmap_file_cont, NULL, m, sizeof(*m)) != OK) {
printf("VM: vfs_request for mmap failed\n");
return ENXIO;
}
/* request queued; don't reply. */
return SUSPEND;
}
/* Return mapping, as seen from process. */ /* Return mapping, as seen from process. */
m->VMM_RETADDR = vr->vaddr; m->VMM_RETADDR = vr->vaddr;

View file

@ -228,5 +228,14 @@ int vfs_request(int reqno, int fd, struct vmproc *vmp, u64_t offset,
int do_vfs_reply(message *m); int do_vfs_reply(message *m);
/* mem_file.c */ /* mem_file.c */
void mappedfile_setfile(struct vir_region *region, int fd, u64_t offset, int mappedfile_setfile(struct vmproc *owner, struct vir_region *region,
dev_t dev, ino_t ino, u16_t clearend, int prefill); int fd, u64_t offset,
dev_t dev, ino_t ino, u16_t clearend, int prefill, int mayclose);
/* fdref.c */
struct fdref *fdref_new(struct vmproc *owner, ino_t ino, dev_t dev, int fd);
struct fdref *fdref_dedup_or_new(struct vmproc *owner, ino_t ino, dev_t dev,
int fd, int mayclose);
void fdref_ref(struct fdref *ref, struct vir_region *region);
void fdref_deref(struct vir_region *region);
void fdref_sanitycheck(void);

View file

@ -833,6 +833,8 @@ struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr)
if(!(newvr = region_new(vr->parent, vr->vaddr, vr->length, vr->flags, vr->def_memtype))) if(!(newvr = region_new(vr->parent, vr->vaddr, vr->length, vr->flags, vr->def_memtype)))
return NULL; return NULL;
USE(newvr, newvr->parent = vmp;);
if(vr->def_memtype->ev_copy && (r=vr->def_memtype->ev_copy(vr, newvr)) != OK) { if(vr->def_memtype->ev_copy && (r=vr->def_memtype->ev_copy(vr, newvr)) != OK) {
map_free(newvr); map_free(newvr);
printf("VM: memtype-specific copy failed (%d)\n", r); printf("VM: memtype-specific copy failed (%d)\n", r);
@ -980,7 +982,6 @@ struct vir_region *start_src_vr;
map_free_proc(dst); map_free_proc(dst);
return ENOMEM; return ENOMEM;
} }
USE(newvr, newvr->parent = dst;);
region_insert(&dst->vm_regions_avl, newvr); region_insert(&dst->vm_regions_avl, newvr);
assert(vr->length == newvr->length); assert(vr->length == newvr->length);

View file

@ -19,6 +19,7 @@
#include "phys_region.h" #include "phys_region.h"
#include "memtype.h" #include "memtype.h"
#include "vm.h" #include "vm.h"
#include "fdref.h"
struct phys_block { struct phys_block {
#if SANITYCHECKS #if SANITYCHECKS
@ -53,11 +54,9 @@ typedef struct vir_region {
} shared; } shared;
struct phys_block *pb_cache; struct phys_block *pb_cache;
struct { struct {
int procfd; /* cloned fd in proc for mmap */
dev_t dev;
ino_t ino;
u64_t offset;
int inited; int inited;
struct fdref *fdref;
u64_t offset;
u16_t clearend; u16_t clearend;
} file; } file;
} param; } param;

144
servers/vm/vfs.c Normal file
View file

@ -0,0 +1,144 @@
/* Sending requests to VFS and handling the replies. */
#define _SYSTEM 1
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/config.h>
#include <minix/const.h>
#include <minix/ds.h>
#include <minix/endpoint.h>
#include <minix/minlib.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/type.h>
#include <minix/bitmap.h>
#include <string.h>
#include <errno.h>
#include <env.h>
#include <unistd.h>
#include <assert.h>
#include <sys/param.h>
#include "proto.h"
#include "glo.h"
#include "util.h"
#include "region.h"
#include "sanitycheck.h"
#define STATELEN 50
static struct vfs_request_node {
message reqmsg;
char reqstate[STATELEN];
void *opaque;
endpoint_t who;
u32_t req_id;
vfs_callback_t callback;
struct vfs_request_node *next;
} *first_queued, *active;
static void activate(void)
{
assert(!active);
assert(first_queued);
active = first_queued;
first_queued = first_queued->next;
if(asynsend3(VFS_PROC_NR, &active->reqmsg, AMF_NOREPLY) != OK)
panic("VM: asynsend to VFS failed");
}
/*===========================================================================*
* vfs_request *
*===========================================================================*/
int vfs_request(int reqno, int fd, struct vmproc *vmp, u64_t offset, u32_t len,
vfs_callback_t reply_callback, void *cbarg, void *state, int statelen)
{
/* Perform an asynchronous request to VFS.
* We send a message of type VFS_VMCALL to VFS. VFS will respond
* with message type VM_VFS_REPLY. We send the request asynchronously
* and then handle the reply as it if were a VM_VFS_REPLY request.
*/
message *m;
static u32_t reqid = 0;
struct vfs_request_node *reqnode;
reqid++;
assert(statelen <= STATELEN);
if(!SLABALLOC(reqnode)) {
printf("vfs_request: no memory for request node\n");
return ENOMEM;
}
m = &reqnode->reqmsg;
m->m_type = VFS_VMCALL;
m->VFS_VMCALL_REQ = reqno;
m->VFS_VMCALL_FD = fd;
m->VFS_VMCALL_REQID = reqid;
m->VFS_VMCALL_ENDPOINT = vmp->vm_endpoint;
m->VFS_VMCALL_OFFSET_LO = ex64lo(offset);
m->VFS_VMCALL_OFFSET_HI = ex64hi(offset);
m->VFS_VMCALL_LENGTH = len;
reqnode->who = vmp->vm_endpoint;
reqnode->req_id = reqid;
reqnode->next = first_queued;
reqnode->callback = reply_callback;
reqnode->opaque = cbarg;
if(state) memcpy(reqnode->reqstate, state, statelen);
first_queued = reqnode;
/* Send the request message if none pending. */
if(!active)
activate();
return OK;
}
/*===========================================================================*
* do_vfs_reply *
*===========================================================================*/
int do_vfs_reply(message *m)
{
/* VFS has handled a VM request and VFS has replied. It must be the
* active request.
*/
struct vfs_request_node *orignode = active;
vfs_callback_t req_callback;
void *cbarg;
int n;
struct vmproc *vmp;
if(m->m_source != VFS_PROC_NR)
return ENOSYS;
assert(active);
assert(active->req_id == m->VMV_REQID);
/* the endpoint may have exited */
if(vm_isokendpt(m->VMV_ENDPOINT, &n) != OK)
vmp = NULL;
else vmp = &vmproc[n];
req_callback = active->callback;
cbarg = active->opaque;
active = NULL;
/* Invoke requested reply-callback within VM. */
if(req_callback) req_callback(vmp, m, cbarg, orignode->reqstate);
SLABFREE(orignode);
/* Send the next request message if any. */
if(first_queued)
activate();
return SUSPEND; /* don't reply to the reply */
}

View file

@ -34,6 +34,7 @@ OBJS.test57= test57loop.o
# Cache testing programs # Cache testing programs
OBJS.test71+= testcache.o OBJS.test71+= testcache.o
OBJS.test72+= testcache.o OBJS.test72+= testcache.o
OBJS.test74+= testcache.o
LDADD.test72+= -lminixfs LDADD.test72+= -lminixfs
PROGS += testvm PROGS += testvm
@ -47,7 +48,7 @@ MINIX_TESTS= \
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \ 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \
41 42 43 44 45 46 48 49 50 52 53 54 55 56 58 59 60 \ 41 42 43 44 45 46 48 49 50 52 53 54 55 56 58 59 60 \
61 64 65 66 67 68 69 70 71 72 73 61 64 65 66 67 68 69 70 71 72 73 74
.if ${MACHINE_ARCH} == "i386" .if ${MACHINE_ARCH} == "i386"
MINIX_TESTS+= \ MINIX_TESTS+= \

View file

@ -13,8 +13,7 @@
#include "common.h" #include "common.h"
int common_test_nr = -1, errct = 0, subtest; int common_test_nr = -1, errct = 0, subtest;
int quietflag = 1, bigflag = 0;
int quietflag = 1;
/* provide a default max_error symbol as Max_error with a value /* provide a default max_error symbol as Max_error with a value
* of 5. The test program can override it wit its own max_error * of 5. The test program can override it wit its own max_error
@ -30,6 +29,11 @@ int test_nr;
char buf[64]; char buf[64];
int i; int i;
/* if this variable is set, specify to tests we are running
* in 'overnight' mode
*/
bigflag = !!getenv(BIGVARNAME);
common_test_nr = test_nr; common_test_nr = test_nr;
printf("Test %2d ", test_nr); printf("Test %2d ", test_nr);
fflush(stdout); /* since stdout is probably line buffered */ fflush(stdout); /* since stdout is probably line buffered */

View file

@ -28,6 +28,11 @@ alltests=" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
sh1.sh sh2.sh interp.sh" sh1.sh sh2.sh interp.sh"
tests_no=`expr 0` tests_no=`expr 0`
# test mmap only if enabled in sysenv
if sysenv filemap >/dev/null
then alltests="$alltests 74"
fi
# If root, make sure the setuid tests have the correct permissions # If root, make sure the setuid tests have the correct permissions
# and make the dir bin-owned. # and make the dir bin-owned.
if [ "$ROOT" ] if [ "$ROOT" ]

View file

@ -23,32 +23,6 @@
#include "common.h" #include "common.h"
#include "testcache.h" #include "testcache.h"
/* we want to flexibly split this test over multiple files
* - for big working sets we might run over the 2GB MFS file limit
* - we might want to test the FS being able to handle lots of
* files / unusual metadata situations
*/
#define MBPERFILE 100
#define MB (1024*1024)
#define MAXFILES ((u64_t) MAXBLOCKS * MAXBLOCKSIZE / MB / MBPERFILE + 1)
static int fds[MAXFILES];
static void
get_fd_offset(int b, int blocksize, u64_t *file_offset, int *fd)
{
u64_t offset = (u64_t) b * blocksize;
int filenumber;
filenumber = offset / MB / MBPERFILE;
assert(filenumber >= 0 && filenumber < MAXFILES);
assert(fds[filenumber] > 0);
*fd = fds[filenumber];
*file_offset = offset - (filenumber * MBPERFILE * MB);
}
int int
dowriteblock(int b, int blocksize, u32_t seed, char *data) dowriteblock(int b, int blocksize, u32_t seed, char *data)
{ {
@ -86,19 +60,14 @@ void testend(void) { }
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
int f, big = !!getenv(BIGVARNAME), iter = 2; int iter = 2;
start(71); start(71);
cachequiet(!big); cachequiet(!bigflag);
if(big) iter = 3; if(bigflag) iter = 3;
for(f = 0; f < MAXFILES; f++) { makefiles(MAXFILES);
char tempfilename[] = "cachetest.XXXXXXXX";
fds[f] = mkstemp(tempfilename);
if(fds[f] < 0) { perror("mkstemp"); e(20); return 1; }
assert(fds[f] > 0);
}
/* Try various combinations working set sizes /* Try various combinations working set sizes
* and block sizes in order to specifically * and block sizes in order to specifically
@ -112,18 +81,13 @@ main(int argc, char *argv[])
if(dotest(PAGE_SIZE*3, 100, iter)) e(3); if(dotest(PAGE_SIZE*3, 100, iter)) e(3);
if(dotest(PAGE_SIZE, 20000, iter)) e(5); if(dotest(PAGE_SIZE, 20000, iter)) e(5);
if(big) { if(bigflag) {
u32_t totalmem, freemem, cachedmem; u32_t totalmem, freemem, cachedmem;
if(dotest(PAGE_SIZE, 150000, iter)) e(5); if(dotest(PAGE_SIZE, 150000, iter)) e(5);
getmem(&totalmem, &freemem, &cachedmem); getmem(&totalmem, &freemem, &cachedmem);
if(dotest(PAGE_SIZE, totalmem*1.5, iter)) e(6); if(dotest(PAGE_SIZE, totalmem*1.5, iter)) e(6);
} }
for(f = 0; f < MAXFILES; f++) {
assert(fds[f] > 0);
close(fds[f]);
}
quit(); quit();
return 0; return 0;

113
test/test74.c Normal file
View file

@ -0,0 +1,113 @@
/* Test 74 - mmap functionality test.
*/
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/ioc_memory.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include "common.h"
#include "testcache.h"
int
dowriteblock(int b, int blocksize, u32_t seed, char *data)
{
u64_t offset;
int fd;
get_fd_offset(b, blocksize, &offset, &fd);
if(pwrite(fd, data, blocksize, offset) < blocksize) {
perror("pwrite");
return -1;
}
return blocksize;
}
int
readblock(int b, int blocksize, u32_t seed, char *data)
{
u64_t offset;
int fd;
char *mmapdata;
int pread_first = random() % 2;
get_fd_offset(b, blocksize, &offset, &fd);
if(pread_first) {
if(pread(fd, data, blocksize, offset) < blocksize) {
perror("pread");
return -1;
}
}
if((mmapdata = minix_mmap(NULL, blocksize, PROT_READ, MAP_PRIVATE | MAP_FILE,
fd, offset)) == MAP_FAILED) {
perror("mmap");
return -1;
}
if(!pread_first) {
if(pread(fd, data, blocksize, offset) < blocksize) {
perror("pread");
return -1;
}
}
if(memcmp(mmapdata, data, blocksize)) {
fprintf(stderr, "readblock: mmap, pread mismatch\n");
return -1;
}
if(minix_munmap(mmapdata, blocksize) < 0) {
perror("munmap");
return -1;
}
return blocksize;
}
void testend(void) { }
int
main(int argc, char *argv[])
{
int iter = 2;
start(74);
makefiles(MAXFILES);
cachequiet(!bigflag);
if(bigflag) iter = 3;
/* Try various combinations working set sizes
* and block sizes in order to specifically
* target the primary cache, then primary+secondary
* cache, then primary+secondary cache+secondary
* cache eviction.
*/
if(dotest(PAGE_SIZE, 100, iter)) e(5);
if(dotest(PAGE_SIZE*2, 100, iter)) e(2);
if(dotest(PAGE_SIZE*3, 100, iter)) e(3);
if(dotest(PAGE_SIZE, 20000, iter)) e(5);
if(bigflag) {
u32_t totalmem, freemem, cachedmem;
if(dotest(PAGE_SIZE, 150000, iter)) e(5);
getmem(&totalmem, &freemem, &cachedmem);
if(dotest(PAGE_SIZE, totalmem*1.5, iter)) e(6);
}
quit();
return 0;
}

View file

@ -23,6 +23,8 @@
extern int quietflag; extern int quietflag;
int fds[MAXFILES];
static void static void
genblock(int b, char *blockdata, int blocksize, u32_t seed) genblock(int b, char *blockdata, int blocksize, u32_t seed)
{ {
@ -210,6 +212,37 @@ dotest(int blocksize, int nblocks, int iterations)
return 0; return 0;
} }
void
get_fd_offset(int b, int blocksize, u64_t *file_offset, int *fd)
{
u64_t offset = (u64_t) b * blocksize;
int filenumber;
filenumber = offset / MB / MBPERFILE;
assert(filenumber >= 0 && filenumber < MAXFILES);
assert(fds[filenumber] > 0);
*fd = fds[filenumber];
*file_offset = offset - (filenumber * MBPERFILE * MB);
}
void
makefiles(int n)
{
int f;
for(f = 0; f < n; f++) {
char tempfilename[] = "cachetest.XXXXXXXX";
fds[f] = mkstemp(tempfilename);
if(fds[f] < 0) {
perror("mkstemp");
fprintf(stderr, "mkstemp %d/%d failed\n", f, n);
exit(1);
}
assert(fds[f] > 0);
}
}
void cachequiet(int quiet) void cachequiet(int quiet)
{ {
quietflag = quiet; quietflag = quiet;

View file

@ -14,5 +14,21 @@ int readblock(int b, int blocksize, u32_t seed, char *block);
void testend(void); void testend(void);
int dotest(int blocksize, int nblocks, int iterations); int dotest(int blocksize, int nblocks, int iterations);
void cachequiet(int quiet); void cachequiet(int quiet);
void get_fd_offset(int b, int blocksize, u64_t *file_offset, int *fd);
void makefiles(int n);
#define OK_BLOCK_GONE -999 #define OK_BLOCK_GONE -999
/* for file-oriented tests:
*
* we want to flexibly split tests over multiple files
* - for big working sets we might run over the 2GB MFS file limit
* - we might want to test the FS being able to handle lots of
* files / unusual metadata situations
*/
#define MBPERFILE 2000
#define MB (1024*1024)
#define MAXFILES ((u64_t) MAXBLOCKS * MAXBLOCKSIZE / MB / MBPERFILE + 1)
extern int fds[MAXFILES], bigflag;