secondary cache feature in vm.

A new call to vm lets processes yield a part of their memory to vm,
together with an id, getting newly allocated memory in return. vm is
allowed to forget about it if it runs out of memory. processes can ask
for it back using the same id. (These two operations are normally
combined in a single call.)

It can be used as a as-big-as-memory-will-allow block cache for
filesystems, which is how mfs now uses it.
This commit is contained in:
Ben Gras 2010-05-05 11:35:04 +00:00
parent 51d46f8e46
commit f78d8e74fd
37 changed files with 1060 additions and 290 deletions

View file

@ -957,6 +957,23 @@
#define VM_MUNMAP_TEXT (VM_RQ_BASE+19)
/* To VM: forget all my yielded blocks. */
#define VM_FORGETBLOCKS (VM_RQ_BASE+22)
/* To VM: forget this block. */
#define VM_FORGETBLOCK (VM_RQ_BASE+23)
#define VMFB_IDHI m1_i1
#define VMFB_IDLO m1_i2
/* To VM: combined yield+get call. */
#define VM_YIELDBLOCKGETBLOCK (VM_RQ_BASE+25)
#define VMYBGB_VADDR m2_p1
#define VMYBGB_GETIDHI m2_i1
#define VMYBGB_GETIDLO m2_i2
#define VMYBGB_LEN m2_i3
#define VMYBGB_YIELDIDHI m2_l1
#define VMYBGB_YIELDIDLO m2_l2
/* Calls from VFS. */
# define VMV_ENDPOINT m1_i1 /* for all VM_VFS_REPLY_* */
#define VM_VFS_REPLY_OPEN (VM_RQ_BASE+30)
@ -1025,7 +1042,8 @@
/* Basic vm calls allowed to every process. */
#define VM_BASIC_CALLS \
VM_MMAP, VM_MUNMAP, VM_MUNMAP_TEXT, VM_MAP_PHYS, VM_UNMAP_PHYS
VM_MMAP, VM_MUNMAP, VM_MUNMAP_TEXT, VM_MAP_PHYS, VM_UNMAP_PHYS, \
VM_FORGETBLOCKS, VM_FORGETBLOCK, VM_YIELDBLOCKGETBLOCK
/*===========================================================================*
* Messages for IPC server *

View file

@ -27,6 +27,15 @@ _PROTOTYPE( int vm_ctl, (int what, int param));
_PROTOTYPE( int vm_set_priv, (int procnr, void *buf));
_PROTOTYPE( int vm_update, (endpoint_t src_e, endpoint_t dst_e));
_PROTOTYPE( int vm_query_exit, (int *endpt));
_PROTOTYPE( int vm_forgetblock, (u64_t id));
_PROTOTYPE( void vm_forgetblocks, (void));
_PROTOTYPE( int vm_yield_block_get_block, (u64_t yieldid, u64_t getid,
void *mem, vir_bytes len));
/* Invalid ID with special meaning for the vm_yield_block_get_block
* interface.
*/
#define VM_BLOCKID_NONE make64(ULONG_MAX, ULONG_MAX)
/* VM kernel request types. */
#define VMPTYPE_NONE 0

View file

@ -95,6 +95,7 @@ SRCS= \
vm_map_phys.c \
vm_umap.c \
vm_push_sig.c \
vm_yield_get_block.c \
asynsend.c \
kprintf.c \
kputc.c \

View file

@ -0,0 +1,47 @@
#include "syslib.h"
#include <minix/vm.h>
#include <minix/u64.h>
/*===========================================================================*
* vm_forgetblocks *
*===========================================================================*/
PUBLIC void vm_forgetblocks(void)
{
message m;
_taskcall(VM_PROC_NR, VM_FORGETBLOCKS, &m);
return;
}
/*===========================================================================*
* vm_forgetblock *
*===========================================================================*/
PUBLIC int vm_forgetblock(u64_t id)
{
message m;
m.VMFB_IDHI = ex64hi(id);
m.VMFB_IDLO = ex64lo(id);
return _taskcall(VM_PROC_NR, VM_FORGETBLOCK, &m);
}
/*===========================================================================*
* vm_yield_block_get_block *
*===========================================================================*/
PUBLIC int vm_yield_block_get_block(u64_t yieldid, u64_t getid,
void *mem, vir_bytes len)
{
message m;
m.VMYBGB_VADDR = mem;
m.VMYBGB_GETIDHI = ex64hi(getid);
m.VMYBGB_GETIDLO = ex64lo(getid);
m.VMYBGB_LEN = len;
m.VMYBGB_YIELDIDHI = ex64hi(yieldid);
m.VMYBGB_YIELDIDLO = ex64lo(yieldid);
return _taskcall(VM_PROC_NR, VM_YIELDBLOCKGETBLOCK, &m);
}

View file

@ -12,7 +12,7 @@ MAN=
BINDIR?= /sbin
NR_BUFS= 1024
CPPFLAGS+= -DNR_BUFS=${NR_BUFS}
DEFAULT_NR_BUFS= 1024
CPPFLAGS+= -DDEFAULT_NR_BUFS=${DEFAULT_NR_BUFS}
.include <minix.prog.mk>

View file

@ -43,7 +43,7 @@ union fsdata_u {
#define b_v2_ino bp->b__v2_ino
#define b_bitmap bp->b__bitmap
#define BUFHASH(b) ((b) % NR_BUFS)
#define BUFHASH(b) ((b) % nr_bufs)
EXTERN struct buf *front; /* points to least recently used free block */
EXTERN struct buf *rear; /* points to most recently used free block */

View file

@ -18,6 +18,8 @@
#include <minix/com.h>
#include <minix/u64.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "buf.h"
#include "super.h"
#include "inode.h"
@ -25,6 +27,8 @@
FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) );
FORWARD _PROTOTYPE( int rw_block, (struct buf *, int) );
PRIVATE int vmcache_avail = -1; /* 0 if not available, >0 if available. */
/*===========================================================================*
* get_block *
*===========================================================================*/
@ -51,6 +55,28 @@ PUBLIC struct buf *get_block(
int b;
static struct buf *bp, *prev_ptr;
int vmfound = 0;
u64_t yieldid = VM_BLOCKID_NONE, getid = make64(dev, block);
int vmcache = 0;
assert(buf_hash);
assert(buf);
assert(nr_bufs > 0);
if(vmcache_avail < 0) {
/* Test once for the availability of the vm yield block feature. */
if(vm_forgetblock(VM_BLOCKID_NONE) == ENOSYS) {
vmcache_avail = 0;
} else {
vmcache_avail = 1;
}
}
/* use vmcache if it's available, and allowed, and we're not doing
* i/o on a ram disk device.
*/
if(vmcache_avail && may_use_vmcache && major(dev) != MEMORY_MAJOR)
vmcache = 1;
ASSERT(fs_block_size > 0);
@ -80,7 +106,7 @@ PUBLIC struct buf *get_block(
}
/* Desired block is not on available chain. Take oldest block ('front'). */
if ((bp = front) == NIL_BUF) panic("all buffers in use: %d", NR_BUFS);
if ((bp = front) == NIL_BUF) panic("all buffers in use: %d", nr_bufs);
if(bp->b_bytes < fs_block_size) {
ASSERT(!bp->bp);
@ -126,6 +152,13 @@ PUBLIC struct buf *get_block(
*/
if (bp->b_dev != NO_DEV) {
if (bp->b_dirt == DIRTY) flushall(bp->b_dev);
/* Are we throwing out a block that contained something?
* Give it to VM for the second-layer cache.
*/
yieldid = make64(bp->b_dev, bp->b_blocknr);
assert(bp->b_bytes == fs_block_size);
bp->b_dev = NO_DEV;
}
/* Fill in block's parameters and add it to the hash chain where it goes. */
@ -137,16 +170,47 @@ PUBLIC struct buf *get_block(
buf_hash[b] = bp; /* add to hash list */
if(dev == NO_DEV) {
if(vmcache && cmp64(yieldid, VM_BLOCKID_NONE) != 0) {
vm_yield_block_get_block(yieldid, VM_BLOCKID_NONE,
bp->bp, fs_block_size);
}
return(bp); /* If the caller wanted a NO_DEV block, work is done. */
}
/* Go get the requested block unless searching or prefetching. */
if (dev != NO_DEV) {
if (only_search == PREFETCH) bp->b_dev = NO_DEV;
else
if (only_search == NORMAL) {
rw_block(bp, READING);
if(only_search == PREFETCH || only_search == NORMAL) {
/* Block is not found in our cache, but we do want it
* if it's in the vm cache.
*/
if(vmcache) {
/* If we can satisfy the PREFETCH or NORMAL request
* from the vm cache, work is done.
*/
if(vm_yield_block_get_block(yieldid, getid,
bp->bp, fs_block_size) == OK) {
return bp;
}
}
}
ASSERT(bp->bp);
if(only_search == PREFETCH) {
/* PREFETCH: don't do i/o. */
bp->b_dev = NO_DEV;
} else if (only_search == NORMAL) {
rw_block(bp, READING);
} else if(only_search == NO_READ) {
/* we want this block, but its contents
* will be overwritten. VM has to forget
* about it.
*/
if(vmcache) {
vm_forgetblock(getid);
}
} else
panic("unexpected only_search value: %d", only_search);
assert(bp->bp);
return(bp); /* return the newly acquired block */
}
@ -324,8 +388,10 @@ PUBLIC void invalidate(
register struct buf *bp;
for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++)
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
if (bp->b_dev == device) bp->b_dev = NO_DEV;
vm_forgetblocks();
}
/*===========================================================================*
@ -339,11 +405,18 @@ PUBLIC void flushall(
register struct buf *bp;
static struct buf **dirty; /* static so it isn't on stack */
static int dirtylistsize = 0;
int ndirty;
STATICINIT(dirty, NR_BUFS);
if(dirtylistsize != nr_bufs) {
if(dirtylistsize > 0)
free(dirty);
if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs)))
panic("couldn't allocate dirty buf list");
dirtylistsize = nr_bufs;
}
for (bp = &buf[0], ndirty = 0; bp < &buf[NR_BUFS]; bp++)
for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++)
if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp;
rw_scattered(dev, dirty, ndirty, WRITING);
}
@ -414,6 +487,7 @@ PUBLIC void rw_scattered(
(dev>>MAJOR)&BYTE, (dev>>MINOR)&BYTE,
bp->b_blocknr);
bp->b_dev = NO_DEV; /* invalidate block */
vm_forgetblocks();
}
break;
}
@ -479,7 +553,7 @@ PUBLIC void set_blocksize(int blocksize)
ASSERT(blocksize > 0);
for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++)
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
if(bp->b_count != 0)
panic("change blocksize with buffer in use");
@ -487,25 +561,49 @@ PUBLIC void set_blocksize(int blocksize)
if (rip->i_count > 0)
panic("change blocksize with inode in use");
fs_sync();
buf_pool();
buf_pool(nr_bufs);
fs_block_size = blocksize;
}
/*===========================================================================*
* buf_pool *
*===========================================================================*/
PUBLIC void buf_pool(void)
PUBLIC void buf_pool(int new_nr_bufs)
{
/* Initialize the buffer pool. */
register struct buf *bp;
assert(new_nr_bufs > 0);
if(nr_bufs > 0) {
assert(buf);
fs_sync();
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
if(bp->bp) {
assert(bp->b_bytes > 0);
free_contig(bp->bp, bp->b_bytes);
}
}
}
if(buf)
free(buf);
if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs)))
panic("couldn't allocate buf list (%d)", new_nr_bufs);
if(buf_hash)
free(buf_hash);
if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs)))
panic("couldn't allocate buf hash list (%d)", new_nr_bufs);
nr_bufs = new_nr_bufs;
bufs_in_use = 0;
front = &buf[0];
rear = &buf[NR_BUFS - 1];
rear = &buf[nr_bufs - 1];
for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++) {
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
bp->b_blocknr = NO_BLOCK;
bp->b_dev = NO_DEV;
bp->b_next = bp + 1;
@ -514,10 +612,11 @@ PUBLIC void buf_pool(void)
bp->b_bytes = 0;
}
buf[0].b_prev = NIL_BUF;
buf[NR_BUFS - 1].b_next = NIL_BUF;
buf[nr_bufs - 1].b_next = NIL_BUF;
for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++) bp->b_hash = bp->b_next;
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->b_hash = bp->b_next;
buf_hash[0] = front;
vm_forgetblocks();
}

View file

@ -54,6 +54,7 @@ EXTERN int exitsignaled;
EXTERN int fs_block_size;
/* Buffer cache. */
EXTERN struct buf buf[NR_BUFS];
EXTERN struct buf *buf_hash[NR_BUFS]; /* the buffer hash table */
EXTERN struct buf *buf;
EXTERN struct buf **buf_hash; /* the buffer hash table */
EXTERN int nr_bufs;
EXTERN int may_use_vmcache;

View file

@ -98,6 +98,8 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
/* Initialize the Minix file server. */
int i, r;
may_use_vmcache = 1;
/* Init inode table */
for (i = 0; i < NR_INODES; ++i) {
inode[i].i_count = 0;
@ -111,7 +113,7 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
driver_endpoints[i].driver_e = NONE;
SELF_E = getprocnr();
buf_pool();
buf_pool(DEFAULT_NR_BUFS);
fs_block_size = _MIN_BLOCK_SIZE;
fs_m_in.m_type = FS_READY;

View file

@ -1,6 +1,8 @@
#include "fs.h"
#include <fcntl.h>
#include <assert.h>
#include <minix/vfsif.h>
#include <minix/fsctl.h>
#include "buf.h"
#include "inode.h"
@ -18,12 +20,15 @@ PUBLIC int fs_sync()
struct inode *rip;
struct buf *bp;
assert(nr_bufs > 0);
assert(buf);
/* Write all the dirty inodes to the disk. */
for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
if(rip->i_count > 0 && rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
/* Write all the dirty blocks to the disk, one drive at a time. */
for(bp = &buf[0]; bp < &buf[NR_BUFS]; bp++)
for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
if(bp->b_dev != NO_DEV && bp->b_dirt == DIRTY)
flushall(bp->b_dev);

View file

@ -9,7 +9,7 @@ struct super_block;
/* cache.c */
_PROTOTYPE( zone_t alloc_zone, (dev_t dev, zone_t z) );
_PROTOTYPE( void buf_pool, (void) );
_PROTOTYPE( void buf_pool, (int bufs) );
_PROTOTYPE( void flushall, (dev_t dev) );
_PROTOTYPE( void free_zone, (dev_t dev, zone_t numb) );
_PROTOTYPE( struct buf *get_block, (dev_t dev, block_t block,int only_search));

View file

@ -3,6 +3,7 @@
#include <stddef.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <minix/com.h>
#include <minix/u64.h>
#include "buf.h"
@ -425,16 +426,23 @@ unsigned bytes_ahead; /* bytes beyond position for immediate use */
*/
int block_size;
/* Minimum number of blocks to prefetch. */
# define BLOCKS_MINIMUM (NR_BUFS < 50 ? 18 : 32)
# define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
int block_spec, scale, read_q_size;
unsigned int blocks_ahead, fragment;
block_t block, blocks_left;
off_t ind1_pos;
dev_t dev;
struct buf *bp;
static int readqsize = 0;
static struct buf **read_q;
STATICINIT(read_q, NR_BUFS);
if(readqsize != nr_bufs) {
if(readqsize > 0)
free(read_q);
if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs)))
panic("couldn't allocate read_q");
readqsize = nr_bufs;
}
block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
if (block_spec)
@ -510,7 +518,7 @@ unsigned bytes_ahead; /* bytes beyond position for immediate use */
if (--blocks_ahead == 0) break;
/* Don't trash the cache, leave 4 free. */
if (bufs_in_use >= NR_BUFS - 4) break;
if (bufs_in_use >= nr_bufs - 4) break;
block++;

View file

@ -4,7 +4,8 @@
PROG= vm
SRCS= main.c alloc.c utility.c exec.c exit.c fork.c break.c \
signal.c mmap.c slaballoc.c region.c pagefaults.c addravl.c \
physravl.c rs.c queryexit.c
physravl.c rs.c queryexit.c yieldedavl.c
DPADD+= ${LIBSYS}
LDADD+= -lsys

View file

@ -1,8 +1,8 @@
#include "proto.h"
#include "sanitycheck.h"
#include "pagerange.h"
#include "addravl.h"
#include "proto.h"
#include "util.h"
#include "addravl_defs.h"
#include "cavl_if.h"
#include "cavl_impl.h"

View file

@ -2,23 +2,8 @@
#ifndef ADDRAVL
#define ADDRAVL 1
#define AVL_UNIQUE(id) addr_ ## id
#define AVL_HANDLE pagerange_t *
#define AVL_KEY phys_bytes
#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
#define AVL_NULL NULL
#define AVL_GET_LESS(h, a) (h)->less
#define AVL_GET_GREATER(h, a) (h)->greater
#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
#define AVL_SET_ROOT(h, v) (h)->root = v;
#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0))
#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->addr)
#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->addr, (h2)->addr)
#define AVL_INSIDE_STRUCT char pad[4];
#include "addravl_defs.h"
#include "cavl_if.h"
#include "unavl.h"
#endif

17
servers/vm/addravl_defs.h Normal file
View file

@ -0,0 +1,17 @@
#define AVL_UNIQUE(id) addr_ ## id
#define AVL_HANDLE pagerange_t *
#define AVL_KEY phys_bytes
#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
#define AVL_NULL NULL
#define AVL_GET_LESS(h, a) (h)->less
#define AVL_GET_GREATER(h, a) (h)->greater
#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
#define AVL_SET_ROOT(h, v) (h)->root = v;
#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0))
#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->addr)
#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->addr, (h2)->addr)
#define AVL_INSIDE_STRUCT char pad[4];

View file

@ -90,7 +90,6 @@ PRIVATE bitchunk_t pagemap[CHUNKS];
#define CHECKHOLES
#endif
#if SANITYCHECKS
/*===========================================================================*
@ -184,8 +183,11 @@ PUBLIC phys_clicks alloc_mem(phys_clicks clicks, u32_t memflags)
}
if(vm_paged) {
assert(CLICK_SIZE == VM_PAGE_SIZE);
mem = alloc_pages(clicks, memflags, NULL);
if(mem == NO_MEM) {
free_yielded(clicks * CLICK_SIZE);
mem = alloc_pages(clicks, memflags, NULL);
}
} else {
CHECKHOLES;
prev_ptr = NIL_HOLE;
@ -493,17 +495,12 @@ PRIVATE PUBLIC phys_bytes alloc_pages(int pages, int memflags, phys_bytes *len)
}
if(!pr) {
printf("VM: alloc_pages: alloc failed of %d pages\n", pages);
util_stacktrace();
printmemstats();
if(len)
*len = 0;
#if SANITYCHECKS
if(largest >= pages) {
panic("no memory but largest was enough");
}
assert(largest < pages);
#endif
return NO_MEM;
return NO_MEM;
}
SLABSANE(pr);
@ -552,10 +549,6 @@ PRIVATE PUBLIC phys_bytes alloc_pages(int pages, int memflags, phys_bytes *len)
memstats(&finalnodes, &finalpages, &largest);
sanitycheck();
if(finalpages != wantpages) {
printf("pages start: %d req: %d final: %d\n",
firstpages, pages, finalpages);
}
assert(finalnodes == wantnodes);
assert(finalpages == wantpages);
#endif
@ -916,9 +909,21 @@ struct memlist *alloc_mem_in_list(phys_bytes bytes, u32_t flags)
do {
struct memlist *ml;
phys_bytes mem, gotpages;
mem = alloc_pages(rempages, flags, &gotpages);
vir_bytes freed = 0;
do {
mem = alloc_pages(rempages, flags, &gotpages);
if(mem == NO_MEM) {
printf("*");
freed = free_yielded(rempages * VM_PAGE_SIZE);
}
} while(mem == NO_MEM && freed > 0);
if(mem == NO_MEM) {
printf("alloc_mem_in_list: giving up, %dkB missing\n",
rempages * VM_PAGE_SIZE/1024);
printmemstats();
free_mem_list(head, 1);
return NULL;
}

View file

@ -517,5 +517,8 @@ PUBLIC int proc_new(struct vmproc *vmp,
panic("exec_newmem: pt_bind failed: %d", s);
}
/* No yielded memory blocks. */
yielded_init(&vmp->vm_yielded_blocks);
return OK;
}

View file

@ -42,7 +42,6 @@ PUBLIC void clear_proc(struct vmproc *vmp)
vmp->vm_regions = NULL;
vmp->vm_callback = NULL; /* No pending vfs callback. */
vmp->vm_flags = 0; /* Clear INUSE, so slot is free. */
vmp->vm_count = 0;
vmp->vm_heap = NULL;
#if VMSTATS
vmp->vm_bytecopies = 0;

View file

@ -70,6 +70,7 @@ PUBLIC int do_fork(message *msg)
*vmc = *vmp;
vmc->vm_slot = childproc;
vmc->vm_regions = NULL;
yielded_init(&vmc->vm_yielded_blocks);
vmc->vm_endpoint = NONE; /* In case someone tries to use it. */
vmc->vm_pt = origpt;
vmc->vm_flags &= ~VMF_HASPT;

View file

@ -31,4 +31,3 @@ EXTERN int total_pages;
EXTERN long vm_paged;
EXTERN int meminit_done;

View file

@ -76,6 +76,8 @@ PUBLIC int main(void)
message msg;
int result, who_e, rcv_sts;
sigset_t sigset;
int caller_slot;
struct vmproc *vmp_caller;
/* SEF local startup. */
sef_local_startup();
@ -90,19 +92,19 @@ PUBLIC int main(void)
if(missing_spares > 0) {
pt_cycle(); /* pagetable code wants to be called */
}
SANITYCHECK(SCL_DETAIL);
if ((r=sef_receive_status(ANY, &msg, &rcv_sts)) != OK)
panic("sef_receive_status() error: %d", r);
SANITYCHECK(SCL_DETAIL);
if (is_ipc_notify(rcv_sts)) {
/* Unexpected notify(). */
printf("VM: ignoring notify() from %d\n", msg.m_source);
continue;
}
who_e = msg.m_source;
if(vm_isokendpt(who_e, &caller_slot) != OK)
panic("invalid caller", who_e);
vmp_caller = &vmproc[caller_slot];
c = CALLNUMBER(msg.m_type);
result = ENOSYS; /* Out of range or restricted calls return this. */
if (msg.m_type == VM_PAGEFAULT) {
@ -118,8 +120,7 @@ PUBLIC int main(void)
*/
continue;
} else if(c < 0 || !vm_calls[c].vmc_func) {
printf("VM: out of range or missing callnr %d from %d\n",
msg.m_type, who_e);
/* out of range or missing callnr */
} else {
if (vm_acl_ok(who_e, c) != OK) {
printf("VM: unauthorized %s by %d\n",
@ -135,16 +136,13 @@ PUBLIC int main(void)
* which is a pseudo-result suppressing the reply message.
*/
if(result != SUSPEND) {
SANITYCHECK(SCL_DETAIL);
msg.m_type = result;
if((r=send(who_e, &msg)) != OK) {
printf("VM: couldn't send %d to %d (err %d)\n",
msg.m_type, who_e, r);
panic("send() error");
}
SANITYCHECK(SCL_DETAIL);
}
SANITYCHECK(SCL_DETAIL);
}
return(OK);
}
@ -356,6 +354,9 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
CALLMAP(VM_GETREF, do_get_refcount);
CALLMAP(VM_INFO, do_info);
CALLMAP(VM_QUERY_EXIT, do_query_exit);
CALLMAP(VM_FORGETBLOCKS, do_forgetblocks);
CALLMAP(VM_FORGETBLOCK, do_forgetblock);
CALLMAP(VM_YIELDBLOCKGETBLOCK, do_yieldblockgetblock);
/* Sanity checks */
if(find_kernel_top() >= VM_PROCSTART)
@ -396,6 +397,14 @@ PRIVATE void sef_cb_signal_handler(int signo)
do_memory();
break;
}
/* It can happen that we get stuck receiving signals
* without sef_receive() returning. We could need more memory
* though.
*/
if(missing_spares > 0) {
pt_cycle(); /* pagetable code wants to be called */
}
}
/*===========================================================================*

22
servers/vm/phys_region.h Normal file
View file

@ -0,0 +1,22 @@
#ifndef PHYS_REGION_H
#define PHYS_REGION_H 1
#include <stddef.h>
typedef struct phys_region {
struct phys_block *ph;
struct vir_region *parent; /* vir_region or NULL if yielded */
vir_bytes offset; /* offset from start of vir region */
#if SANITYCHECKS
int written; /* written to pagetable */
#endif
/* list of phys_regions that reference the same phys_block */
struct phys_region *next_ph_list;
/* AVL fields */
struct phys_region *less, *greater;
int factor;
} phys_region_t;
#endif

View file

@ -1,8 +1,8 @@
#include "sanitycheck.h"
#include "region.h"
#include "physravl.h"
#include "util.h"
#include "proto.h"
#include "sanitycheck.h"
#include "phys_region.h"
#include "physravl_defs.h"
#include "cavl_if.h"
#include "cavl_impl.h"

View file

@ -2,23 +2,9 @@
#ifndef _PHYSRAVL_H
#define _PHYSRAVL_H
#define AVL_UNIQUE(id) physr_ ## id
#define AVL_HANDLE phys_region_t *
#define AVL_KEY phys_bytes
#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
#define AVL_NULL NULL
#define AVL_GET_LESS(h, a) (h)->less
#define AVL_GET_GREATER(h, a) (h)->greater
#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
#define AVL_SET_ROOT(h, v) USE((h), (h)->root = v;);
#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0))
#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->offset)
#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->offset, (h2)->offset)
#define AVL_INSIDE_STRUCT char pad[4];
#include "phys_region.h"
#include "physravl_defs.h"
#include "cavl_if.h"
#include "unavl.h"
#endif

View file

@ -0,0 +1,18 @@
#include <minix/u64.h>
#define AVL_UNIQUE(id) physr_ ## id
#define AVL_HANDLE phys_region_t *
#define AVL_KEY vir_bytes
#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
#define AVL_NULL NULL
#define AVL_GET_LESS(h, a) (h)->less
#define AVL_GET_GREATER(h, a) (h)->greater
#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
#define AVL_SET_ROOT(h, v) USE((h), (h)->root = v;);
#define AVL_COMPARE_KEY_KEY(k1, k2) ((k1) > (k2) ? 1 : ((k1) < (k2) ? -1 : 0))
#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->offset)
#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->offset, (h2)->offset)
#define AVL_INSIDE_STRUCT char pad[4];

View file

@ -14,7 +14,7 @@ struct phys_region;
#include <timers.h>
#include <stdio.h>
#include <pagetable.h>
#include "vmproc.h"
#include "vm.h"
/* alloc.c */
@ -168,7 +168,12 @@ _PROTOTYPE(int copy_abs2region, (phys_bytes abs,
struct vir_region *destregion, phys_bytes offset, phys_bytes len));
#if SANITYCHECKS
_PROTOTYPE(void map_sanitycheck,(char *file, int line));
_PROTOTYPE(void blockstats,(void));
#endif
_PROTOTYPE(int do_forgetblocks, (message *m));
_PROTOTYPE(int do_forgetblock, (message *m));
_PROTOTYPE(int do_yieldblockgetblock, (message *m));
_PROTOTYPE(vir_bytes free_yielded, (vir_bytes bytes));
/* $(ARCH)/vm.c */
_PROTOTYPE( vir_bytes arch_map2vir, (struct vmproc *vmp, vir_bytes addr));

View file

@ -29,6 +29,9 @@
#include "physravl.h"
#include "memlist.h"
/* LRU list. */
PRIVATE yielded_t *lru_youngest = NULL, *lru_oldest = NULL;
/* Should a physblock be mapped writable? */
#define WRITABLE(r, pb) \
(((r)->flags & (VR_DIRECT | VR_SHARED)) || \
@ -41,11 +44,17 @@ FORWARD _PROTOTYPE(int map_new_physblock, (struct vmproc *vmp,
FORWARD _PROTOTYPE(int map_ph_writept, (struct vmproc *vmp, struct vir_region *vr,
struct phys_region *pr));
FORWARD _PROTOTYPE(phys_bytes freeyieldednode, (yielded_t *node, int freemem));
FORWARD _PROTOTYPE(struct vir_region *map_copy_region, (struct vmproc *vmp, struct vir_region *vr));
FORWARD _PROTOTYPE(struct phys_region *map_clone_ph_block, (struct vmproc *vmp,
struct vir_region *region, struct phys_region *ph, physr_iter *iter));
#if SANITYCHECKS
FORWARD _PROTOTYPE(void lrucheck, (void));
#endif
PRIVATE char *map_name(struct vir_region *vr)
{
static char name[100];
@ -119,8 +128,6 @@ struct vmproc *vmp;
#if SANITYCHECKS
/*===========================================================================*
* map_sanitycheck_pt *
*===========================================================================*/
@ -158,6 +165,8 @@ PUBLIC void map_sanitycheck(char *file, int line)
{
struct vmproc *vmp;
lrucheck();
/* Macro for looping over all physical blocks of all regions of
* all processes.
*/
@ -230,6 +239,63 @@ PUBLIC void map_sanitycheck(char *file, int line)
MYASSERT(!(pr->ph->length % VM_PAGE_SIZE)););
ALLREGIONS(,MYASSERT(map_sanitycheck_pt(vmp, vr, pr) == OK));
}
#define LRUCHECK lrucheck()
PRIVATE void lrucheck(void)
{
yielded_t *list;
/* list is empty and ok if both ends point to null. */
if(!lru_youngest && !lru_oldest)
return;
/* if not, both should point to something. */
SLABSANE(lru_youngest);
SLABSANE(lru_oldest);
assert(!lru_youngest->younger);
assert(!lru_oldest->older);
for(list = lru_youngest; list; list = list->older) {
SLABSANE(list);
if(list->younger) {
SLABSANE(list->younger);
assert(list->younger->older == list);
} else assert(list == lru_youngest);
if(list->older) {
SLABSANE(list->older);
assert(list->older->younger == list);
} else assert(list == lru_oldest);
}
}
void blockstats(void)
{
yielded_t *list;
int blocks = 0;
phys_bytes mem = 0;
clock_t ticks;
int s;
s = getuptime(&ticks);
assert(s == OK);
LRUCHECK;
for(list = lru_youngest; list; list = list->older) {
mem += list->len;
blocks++;
}
if(blocks > 0)
printf("%d blocks, %dkB; ", blocks, mem/1024);
printmemstats();
}
#else
#define LRUCHECK
#endif
@ -612,6 +678,107 @@ PRIVATE int map_free(struct vmproc *vmp, struct vir_region *region)
return OK;
}
/*===========================================================================*
* free_yielded_proc *
*===========================================================================*/
PRIVATE vir_bytes free_yielded_proc(struct vmproc *vmp)
{
yielded_t *yb;
int y = 0;
vir_bytes total = 0;
SANITYCHECK(SCL_FUNCTIONS);
/* Free associated regions. */
while((yb = yielded_search_least(&vmp->vm_yielded_blocks))) {
SLABSANE(yb);
total += freeyieldednode(yb, 1);
y++;
}
yielded_init(&vmp->vm_yielded_blocks);
return total;
}
PRIVATE phys_bytes freeyieldednode(yielded_t *node, int freemem)
{
yielded_t *older, *younger, *removed;
int p;
vir_bytes len;
SLABSANE(node);
LRUCHECK;
/* Update LRU. */
younger = node->younger;
older = node->older;
if(younger) {
SLABSANE(younger);
assert(younger->older == node);
USE(younger, younger->older = node->older;);
} else {
assert(node == lru_youngest);
lru_youngest = node->older;
}
if(older) {
SLABSANE(older);
assert(older->younger == node);
USE(older, older->younger = node->younger;);
} else {
assert(node == lru_oldest);
lru_oldest = node->younger;
}
LRUCHECK;
/* Update AVL. */
if(vm_isokendpt(node->owner, &p) != OK)
panic("out of date owner of yielded block %d", node->owner);
removed = yielded_remove(&vmproc[p].vm_yielded_blocks, node->id);
assert(removed == node);
/* Free associated memory if requested. */
if(freemem) {
free_mem(ABS2CLICK(node->addr), ABS2CLICK(node->len));
}
/* Free node. */
len = node->len;
SLABFREE(node);
return len;
}
/*========================================================================*
* free_yielded *
*========================================================================*/
PUBLIC vir_bytes free_yielded(vir_bytes max_bytes)
{
/* PRIVATE yielded_t *lru_youngest = NULL, *lru_oldest = NULL; */
vir_bytes freed = 0;
int blocks = 0;
while(freed < max_bytes && lru_oldest) {
SLABSANE(lru_oldest);
freed += freeyieldednode(lru_oldest, 1);
blocks++;
}
return freed;
}
/*========================================================================*
* map_free_proc *
*========================================================================*/
@ -620,8 +787,6 @@ struct vmproc *vmp;
{
struct vir_region *r, *nextr;
SANITYCHECK(SCL_FUNCTIONS);
for(r = vmp->vm_regions; r; r = nextr) {
nextr = r->next;
SANITYCHECK(SCL_DETAIL);
@ -635,9 +800,11 @@ struct vmproc *vmp;
#endif
SANITYCHECK(SCL_DETAIL);
}
vmp->vm_regions = NULL;
/* Free associated yielded blocks. */
free_yielded_proc(vmp);
SANITYCHECK(SCL_FUNCTIONS);
return OK;
@ -760,6 +927,7 @@ int written;
newphysr->parent = region;
/* No other references to this block. */
newphysr->next_ph_list = NULL;);
#if SANITYCHECKS
USE(newphysr, newphysr->written = written;);
#endif
@ -783,7 +951,6 @@ int written;
/* Things did not go well. Undo everything. */
for(ml = memlist; ml; ml = ml->next) {
struct phys_region *physr;
offset += ml->length;
if((physr = physr_search(region->phys, offset,
AVL_EQUAL))) {
assert(physr->ph->refcount == 1);
@ -791,6 +958,7 @@ int written;
physr_remove(region->phys, physr->offset);
SLABFREE(physr);
}
offset += ml->length;
}
} else assert(mapped == length);
@ -816,7 +984,6 @@ struct phys_region *ph;
physr_iter *iter;
{
vir_bytes offset, length;
struct memlist *ml;
u32_t allocflags;
phys_bytes physaddr;
struct phys_region *newpr;
@ -834,6 +1001,9 @@ physr_iter *iter;
* the vir_region could be invalid on unsuccessful return too.)
*/
/* This is only to be done if there is more than one copy. */
assert(ph->ph->refcount > 1);
/* This function takes a physical block, copies its contents
* into newly allocated memory, and replaces the single physical
* block by one or more physical blocks with refcount 1 with the
@ -850,7 +1020,6 @@ physr_iter *iter;
* it with new ones.
*/
SANITYCHECK(SCL_DETAIL);
SLABSANE(ph);
SLABSANE(ph->ph);
assert(ph->ph->refcount > 1);
@ -869,7 +1038,6 @@ physr_iter *iter;
if(map_new_physblock(vmp, region, offset, length,
MAP_NONE, allocflags, written) != OK) {
/* XXX original range now gone. */
free_mem_list(ml, 0);
printf("VM: map_clone_ph_block: map_new_physblock failed.\n");
return NULL;
}
@ -983,9 +1151,9 @@ int write;
struct phys_region *r1 = (er1), *r2 = (er2); \
vir_bytes start = offset, end = offset + length; \
if(r1) { \
start = MAX(start, r1->offset + r1->ph->length); } \
start = MAX(start, r1->offset + r1->ph->length); } \
if(r2) { \
end = MIN(end, r2->offset); } \
end = MIN(end, r2->offset); } \
if(start < end) { \
int r; \
SANITYCHECK(SCL_DETAIL); \
@ -1202,7 +1370,7 @@ PUBLIC int copy_abs2region(phys_bytes abs, struct vir_region *destregion,
sublen = ph->ph->length - suboffset;
assert(suboffset + sublen <= ph->ph->length);
if(ph->ph->refcount != 1) {
printf("VM: copy_abs2region: no phys region found (3).\n");
printf("VM: copy_abs2region: refcount not 1.\n");
return EFAULT;
}
@ -1731,99 +1899,94 @@ PUBLIC void printregionstats(struct vmproc *vmp)
}
/*===========================================================================*
* do_map_memory *
* do_map_memory *
*===========================================================================*/
PRIVATE int do_map_memory(struct vmproc *vms, struct vmproc *vmd,
struct vir_region *vrs, struct vir_region *vrd,
vir_bytes offset_s, vir_bytes offset_d,
vir_bytes length, int flag)
struct vir_region *vrs, struct vir_region *vrd,
vir_bytes offset_s, vir_bytes offset_d,
vir_bytes length, int flag)
{
struct phys_region *prs;
struct phys_region *newphysr;
struct phys_block *pb;
physr_iter iter;
u32_t pt_flag = PTF_PRESENT | PTF_USER;
vir_bytes end;
struct phys_region *prs;
struct phys_region *newphysr;
struct phys_block *pb;
physr_iter iter;
u32_t pt_flag = PTF_PRESENT | PTF_USER;
vir_bytes end;
SANITYCHECK(SCL_FUNCTIONS);
/* Search for the first phys region in the source process. */
physr_start_iter(vrs->phys, &iter, offset_s, AVL_EQUAL);
prs = physr_get_iter(&iter);
if(!prs)
panic("do_map_memory: no aligned phys region: %d", 0);
/* Search for the first phys region in the source process. */
physr_start_iter(vrs->phys, &iter, offset_s, AVL_EQUAL);
prs = physr_get_iter(&iter);
if(!prs)
panic("do_map_memory: no aligned phys region: %d", 0);
/* flag: 0 -> read-only
* 1 -> writable
* -1 -> share as COW, so read-only
*/
if(flag > 0)
pt_flag |= PTF_WRITE;
/* flag: 0 -> read-only
* 1 -> writable
* -1 -> share as COW, so read-only
*/
if(flag > 0)
pt_flag |= PTF_WRITE;
/* Map phys blocks in the source process to the destination process. */
end = offset_d + length;
while((prs = physr_get_iter(&iter)) && offset_d < end) {
/* If a SMAP share was requested but the phys block has already
* been shared as COW, copy the block for the source phys region
* first.
*/
pb = prs->ph;
if(flag >= 0 && pb->refcount > 1
&& pb->share_flag == PBSH_COW) {
/* Map phys blocks in the source process to the destination process. */
end = offset_d + length;
while((prs = physr_get_iter(&iter)) && offset_d < end) {
/* If a SMAP share was requested but the phys block has already
* been shared as COW, copy the block for the source phys region
* first.
*/
pb = prs->ph;
if(flag >= 0 && pb->refcount > 1
&& pb->share_flag == PBSH_COW) {
if(!(prs = map_clone_ph_block(vms, vrs, prs, &iter)))
return ENOMEM;
pb = prs->ph;
}
pb = prs->ph;
}
/* Allocate a new phys region. */
if(!SLABALLOC(newphysr))
return ENOMEM;
/* Allocate a new phys region. */
if(!SLABALLOC(newphysr))
return ENOMEM;
/* Set and link the new phys region to the block. */
newphysr->ph = pb;
newphysr->offset = offset_d;
newphysr->parent = vrd;
newphysr->next_ph_list = pb->firstregion;
pb->firstregion = newphysr;
physr_insert(newphysr->parent->phys, newphysr);
pb->refcount++;
/* Set and link the new phys region to the block. */
newphysr->ph = pb;
newphysr->offset = offset_d;
newphysr->parent = vrd;
newphysr->next_ph_list = pb->firstregion;
pb->firstregion = newphysr;
physr_insert(newphysr->parent->phys, newphysr);
pb->refcount++;
/* If a COW share was requested but the phys block has already
* been shared as SMAP, give up on COW and copy the block for
* the destination phys region now.
*/
if(flag < 0 && pb->refcount > 1
&& pb->share_flag == PBSH_SMAP) {
/* If a COW share was requested but the phys block has already
* been shared as SMAP, give up on COW and copy the block for
* the destination phys region now.
*/
if(flag < 0 && pb->refcount > 1
&& pb->share_flag == PBSH_SMAP) {
if(!(newphysr = map_clone_ph_block(vmd, vrd,
newphysr, NULL))) {
return ENOMEM;
}
}
else {
/* See if this is a COW share or SMAP share. */
if(flag < 0) { /* COW share */
pb->share_flag = PBSH_COW;
/* Update the page table for the src process. */
pt_writemap(&vms->vm_pt, offset_s + vrs->vaddr,
pb->phys, pb->length,
pt_flag, WMF_OVERWRITE);
}
else { /* SMAP share */
pb->share_flag = PBSH_SMAP;
}
/* Update the page table for the destination process. */
pt_writemap(&vmd->vm_pt, offset_d + vrd->vaddr,
pb->phys, pb->length, pt_flag, WMF_OVERWRITE);
}
}
else {
/* See if this is a COW share or SMAP share. */
if(flag < 0) { /* COW share */
pb->share_flag = PBSH_COW;
/* Update the page table for the src process. */
pt_writemap(&vms->vm_pt, offset_s + vrs->vaddr,
pb->phys, pb->length,
pt_flag, WMF_OVERWRITE);
}
else { /* SMAP share */
pb->share_flag = PBSH_SMAP;
}
/* Update the page table for the destination process. */
pt_writemap(&vmd->vm_pt, offset_d + vrd->vaddr,
pb->phys, pb->length, pt_flag, WMF_OVERWRITE);
}
physr_incr_iter(&iter);
offset_d += pb->length;
offset_s += pb->length;
}
SANITYCHECK(SCL_FUNCTIONS);
return OK;
physr_incr_iter(&iter);
offset_d += pb->length;
offset_s += pb->length;
}
return OK;
}
/*===========================================================================*
@ -1872,58 +2035,59 @@ PUBLIC int unmap_memory(endpoint_t sour, endpoint_t dest,
return OK;
}
/*===========================================================================*
* split_phys *
* split_phys *
*===========================================================================*/
PRIVATE int split_phys(struct phys_region *pr, vir_bytes point)
{
struct phys_region *newpr, *q, *prev;
struct phys_block *newpb;
struct phys_block *pb = pr->ph;
struct phys_region *newpr, *q, *prev;
struct phys_block *newpb;
struct phys_block *pb = pr->ph;
/* Split the phys region into 2 parts by @point. */
if(pr->offset >= point || pr->offset + pb->length <= point)
return OK;
if(!SLABALLOC(newpb))
return ENOMEM;
if(pr->offset >= point || pr->offset + pb->length <= point)
return OK;
if(!SLABALLOC(newpb))
return ENOMEM;
/* Split phys block. */
*newpb = *pb;
pb->length = point - pr->offset;
newpb->length -= pb->length;
newpb->phys += pb->length;
/* Split phys block. */
*newpb = *pb;
pb->length = point - pr->offset;
newpb->length -= pb->length;
newpb->phys += pb->length;
/* Split phys regions in a list. */
for(q = pb->firstregion; q; q = q->next_ph_list) {
if(!SLABALLOC(newpr))
return ENOMEM;
/* Split phys regions in a list. */
for(q = pb->firstregion; q; q = q->next_ph_list) {
if(!SLABALLOC(newpr))
return ENOMEM;
*newpr = *q;
newpr->ph = newpb;
newpr->offset += pb->length;
*newpr = *q;
newpr->ph = newpb;
newpr->offset += pb->length;
/* Link to the vir region's phys region list. */
physr_insert(newpr->parent->phys, newpr);
/* Link to the vir region's phys region list. */
physr_insert(newpr->parent->phys, newpr);
/* Link to the next_ph_list. */
if(q == pb->firstregion) {
newpb->firstregion = newpr;
prev = newpr;
} else {
prev->next_ph_list = newpr;
prev = newpr;
}
}
prev->next_ph_list = NULL;
/* Link to the next_ph_list. */
if(q == pb->firstregion) {
newpb->firstregion = newpr;
prev = newpr;
} else {
prev->next_ph_list = newpr;
prev = newpr;
}
}
prev->next_ph_list = NULL;
return OK;
return OK;
}
/*===========================================================================*
* clean_phys_regions *
* clean_phys_regions *
*===========================================================================*/
PRIVATE void clean_phys_regions(struct vir_region *region,
vir_bytes offset, vir_bytes length)
vir_bytes offset, vir_bytes length)
{
/* Consider @offset as the start address and @offset+length as the end address.
* If there are phys regions crossing the start address or the end address,
@ -1931,51 +2095,51 @@ PRIVATE void clean_phys_regions(struct vir_region *region,
*
* We assume that the phys regions are listed in order and don't overlap.
*/
struct phys_region *pr;
physr_iter iter;
struct phys_region *pr;
physr_iter iter;
physr_start_iter_least(region->phys, &iter);
while((pr = physr_get_iter(&iter))) {
/* If this phys region crosses the start address, split it. */
if(pr->offset < offset
&& pr->offset + pr->ph->length > offset) {
split_phys(pr, offset);
physr_start_iter_least(region->phys, &iter);
}
/* If this phys region crosses the end address, split it. */
else if(pr->offset < offset + length
&& pr->offset + pr->ph->length > offset + length) {
split_phys(pr, offset + length);
physr_start_iter_least(region->phys, &iter);
}
else {
physr_incr_iter(&iter);
}
}
physr_start_iter_least(region->phys, &iter);
while((pr = physr_get_iter(&iter))) {
/* If this phys region crosses the start address, split it. */
if(pr->offset < offset
&& pr->offset + pr->ph->length > offset) {
split_phys(pr, offset);
physr_start_iter_least(region->phys, &iter);
}
/* If this phys region crosses the end address, split it. */
else if(pr->offset < offset + length
&& pr->offset + pr->ph->length > offset + length) {
split_phys(pr, offset + length);
physr_start_iter_least(region->phys, &iter);
}
else {
physr_incr_iter(&iter);
}
}
}
/*===========================================================================*
* rm_phys_regions *
* rm_phys_regions *
*===========================================================================*/
PRIVATE void rm_phys_regions(struct vir_region *region,
vir_bytes begin, vir_bytes length)
vir_bytes begin, vir_bytes length)
{
/* Remove all phys regions between @begin and @begin+length.
*
* Don't update the page table, because we will update it at map_memory()
* later.
*/
struct phys_region *pr;
physr_iter iter;
struct phys_region *pr;
physr_iter iter;
physr_start_iter(region->phys, &iter, begin, AVL_GREATER_EQUAL);
while((pr = physr_get_iter(&iter)) && pr->offset < begin + length) {
pb_unreferenced(region, pr);
physr_remove(region->phys, pr->offset);
physr_start_iter(region->phys, &iter, begin,
AVL_GREATER_EQUAL);
SLABFREE(pr);
}
physr_start_iter(region->phys, &iter, begin, AVL_GREATER_EQUAL);
while((pr = physr_get_iter(&iter)) && pr->offset < begin + length) {
pb_unreferenced(region, pr);
physr_remove(region->phys, pr->offset);
physr_start_iter(region->phys, &iter, begin,
AVL_GREATER_EQUAL);
SLABFREE(pr);
}
}
/*===========================================================================*
@ -2056,6 +2220,295 @@ map_lookup_phys(struct vmproc *vmp, u32_t tag)
return pr->ph->phys;
}
/*===========================================================================*
* get_clean_phys_region *
*===========================================================================*/
PRIVATE struct phys_region *
get_clean_phys_region(struct vmproc *vmp, vir_bytes vaddr, vir_bytes length,
struct vir_region **ret_region)
{
struct vir_region *region;
vir_bytes regionoffset, mapaddr;
struct phys_region *ph;
mapaddr = arch_vir2map(vmp, vaddr);
if(!(region = map_lookup(vmp, mapaddr))) {
printf("VM: get_clean_phys_region: 0x%lx not found\n", vaddr);
return NULL;
}
if(!(region->flags & VR_ANON)) {
printf("VM: get_clean_phys_region: non-anon 0x%lx\n", vaddr);
return NULL;
}
assert(mapaddr >= region->vaddr);
assert(mapaddr < region->vaddr + region->length);
regionoffset = mapaddr-region->vaddr;
/* For now, only support the yielding of blocks that are
* exactly a mapped phys_region. Go get that phys_region.
* (This can be improved without changing the interface.)
*/
if(!(ph = physr_search(region->phys, regionoffset,
AVL_EQUAL))) {
printf("VM: get_clean_phys_region: exact block not found\n");
return NULL;
}
/* Make sure this is what we asked for. */
assert(ph->offset == regionoffset);
if(ph->ph->length != length) {
printf("VM: get_clean_phys_region: len mismatch (%d, %d)\n",
ph->ph->length, length);
return NULL;
}
/* If it's mapped more than once, make a copy. */
assert(ph->ph->refcount > 0);
if(ph->ph->refcount > 1) {
int r;
if(!(ph = map_clone_ph_block(vmp, region,
ph, NULL))) {
printf("VM: get_clean_phys_region: ph copy failed\n");
return NULL;
}
}
assert(ph->ph->refcount == 1);
*ret_region = region;
return ph;
}
PRIVATE int getblock(struct vmproc *vmp, u64_t id,
vir_bytes vaddr, vir_bytes len)
{
yielded_t *yb;
struct phys_region *ph;
struct vir_region *region;
/* Try to get the yielded block */
if(!(yb = yielded_search(&vmp->vm_yielded_blocks, id, AVL_EQUAL))) {
return ESRCH;
}
/* Check the size as a sanity check. */
if(yb->len != len) {
printf("VM: id 0x%lx%08lx mismatched size (%d, %d) for %d\n",
ex64hi(id), ex64lo(id), yb->len, len, vmp->vm_endpoint);
return ESRCH;
}
/* Get the intended phys region, make sure refcount is 1. */
if(!(ph = get_clean_phys_region(vmp, vaddr, len, &region))) {
printf("VM: getblock: not found for %d\n", vmp->vm_endpoint);
return EINVAL;
}
assert(ph->ph->refcount == 1);
/* Free the block that is currently there. */
free_mem(ABS2CLICK(ph->ph->phys), ABS2CLICK(ph->ph->length));
/* Set the phys block to new addr and update pagetable. */
USE(ph->ph, ph->ph->phys = yb->addr;);
if(map_ph_writept(vmp, region, ph) != OK) {
/* Presumably it was mapped, so there is no reason
* updating should fail.
*/
panic("do_get_block: couldn't write pt");
}
/* Forget about the yielded block and free the struct. */
freeyieldednode(yb, 0);
return OK;
}
PRIVATE int yieldblock(struct vmproc *vmp, u64_t id,
vir_bytes vaddr, vir_bytes len, yielded_t **retyb)
{
yielded_t *newyb;
vir_bytes mem_clicks, newmem, clicks;
struct vir_region *region;
struct phys_region *ph;
/* Makes no sense if yielded block ID already exists, and
* is likely a serious bug in the caller.
*/
if(yielded_search(&vmp->vm_yielded_blocks, id, AVL_EQUAL)) {
printf("!");
return EINVAL;
}
if(!(ph = get_clean_phys_region(vmp, vaddr, len, &region))) {
printf("VM: do_yield_block: not found for %d\n",
vmp->vm_endpoint);
return EINVAL;
}
/* Make a new block to record the yielding in. */
if(!SLABALLOC(newyb)) {
return ENOMEM;
}
assert(!(ph->ph->phys % VM_PAGE_SIZE));
assert(!(ph->ph->length % VM_PAGE_SIZE));
clicks = CLICKSPERPAGE * ph->ph->length / VM_PAGE_SIZE;
if((mem_clicks = alloc_mem(clicks, PAF_CLEAR)) == NO_MEM) {
SLABFREE(newyb);
return ENOMEM;
}
/* Update yielded block info. */
USE(newyb,
newyb->id = id;
newyb->addr = ph->ph->phys;
newyb->len = ph->ph->length;
newyb->owner = vmp->vm_endpoint;
newyb->younger = NULL;);
/* Set new phys block to new addr and update pagetable. */
USE(ph->ph,
ph->ph->phys = CLICK2ABS(mem_clicks););
if(map_ph_writept(vmp, region, ph) != OK) {
/* Presumably it was mapped, so there is no reason
* updating should fail.
*/
panic("yield_block: couldn't write pt");
}
/* Remember yielded block. */
yielded_insert(&vmp->vm_yielded_blocks, newyb);
/* Add to LRU list too. It's the youngest block. */
LRUCHECK;
if(lru_youngest) {
USE(lru_youngest,
lru_youngest->younger = newyb;);
} else {
lru_oldest = newyb;
}
USE(newyb,
newyb->older = lru_youngest;);
lru_youngest = newyb;
LRUCHECK;
if(retyb)
*retyb = newyb;
return OK;
}
/*===========================================================================*
* do_forgetblocks *
*===========================================================================*/
PUBLIC int do_forgetblocks(message *m)
{
int n;
struct vmproc *vmp;
endpoint_t caller = m->m_source;
if(vm_isokendpt(caller, &n) != OK)
panic("do_yield_block: message from strange source: %d",
m->m_source);
vmp = &vmproc[n];
if(!(vmp->vm_flags & VMF_HASPT)) {
printf("do_forgetblocks: no pt\n");
return EFAULT;
}
free_yielded_proc(vmp);
return OK;
}
/*===========================================================================*
* do_forgetblock *
*===========================================================================*/
PUBLIC int do_forgetblock(message *m)
{
int n;
struct vmproc *vmp;
endpoint_t caller = m->m_source;
yielded_t *yb;
u64_t id;
if(vm_isokendpt(caller, &n) != OK)
panic("do_yield_block: message from strange source: %d",
m->m_source);
vmp = &vmproc[n];
if(!(vmp->vm_flags & VMF_HASPT)) {
printf("do_forgetblock: no pt\n");
return EFAULT;
}
id = make64(m->VMFB_IDLO, m->VMFB_IDHI);
if((yb = yielded_search(&vmp->vm_yielded_blocks, id, AVL_EQUAL))) {
freeyieldednode(yb, 1);
}
return OK;
}
/*===========================================================================*
* do_yieldblockgetblock *
*===========================================================================*/
PUBLIC int do_yieldblockgetblock(message *m)
{
u64_t yieldid, getid;
int n, get = 0;
endpoint_t caller = m->m_source;
struct vmproc *vmp;
yielded_t *yb = NULL;
int r = ESRCH;
size_t len;
if(vm_isokendpt(caller, &n) != OK)
panic("do_yieldblockgetblock: message from strange source: %d",
m->m_source);
vmp = &vmproc[n];
if(!(vmp->vm_flags & VMF_HASPT)) {
printf("do_yieldblockgetblock: no pt\n");
return EFAULT;
}
len = m->VMYBGB_LEN;
if((len % VM_PAGE_SIZE)) {
len += VM_PAGE_SIZE - len % VM_PAGE_SIZE;
}
yieldid = make64(m->VMYBGB_YIELDIDLO, m->VMYBGB_YIELDIDHI);
getid = make64(m->VMYBGB_GETIDLO, m->VMYBGB_GETIDHI);
if(cmp64(yieldid, VM_BLOCKID_NONE) != 0) {
/* A block was given to yield. */
yieldblock(vmp, yieldid, (vir_bytes) m->VMYBGB_VADDR, len, &yb);
}
if(cmp64(getid, VM_BLOCKID_NONE) != 0) {
/* A block was given to get. */
r = getblock(vmp, getid, (vir_bytes) m->VMYBGB_VADDR, len);
}
return r;
}

View file

@ -16,6 +16,9 @@
#include <minix/syslib.h>
#include <minix/const.h>
#include "phys_region.h"
#include "physravl.h"
struct phys_block {
#if SANITYCHECKS
u32_t seencount;
@ -31,24 +34,6 @@ struct phys_block {
struct phys_region *firstregion;
};
typedef struct phys_region {
struct phys_block *ph;
struct vir_region *parent; /* parent vir_region. */
vir_bytes offset; /* offset from start of vir region */
#if SANITYCHECKS
int written; /* written to pagetable */
#endif
/* list of phys_regions that reference the same phys_block */
struct phys_region *next_ph_list;
/* AVL fields */
struct phys_region *less, *greater;
int factor;
} phys_region_t;
#include "physravl.h"
struct vir_region {
struct vir_region *next; /* next virtual region in this process */
vir_bytes vaddr; /* virtual address, offset from pagetable */
@ -66,11 +51,11 @@ struct vir_region {
#define VR_LOWER16MB 0x008
#define VR_LOWER1MB 0x010
#define VR_CONTIG 0x020 /* Must be physically contiguous. */
#define VR_SHARED 0x040
/* Mapping type: */
#define VR_ANON 0x100 /* Memory to be cleared and allocated */
#define VR_DIRECT 0x200 /* Mapped, but not managed by VM */
#define VR_SHARED 0x40
/* Tag values: */
#define VRT_NONE 0xBEEF0000

View file

@ -4,7 +4,6 @@
#include <assert.h>
#include "vm.h"
#include "glo.h"
#if SANITYCHECKS
@ -35,16 +34,6 @@
incheck = 0; \
}
#include "kernel/proc.h"
#define USE(obj, code) do { \
slabunlock(obj, sizeof(*obj)); \
do { \
code \
} while(0); \
slablock(obj, sizeof(*obj)); \
} while(0)
#define SLABSANE(ptr) { \
if(!slabsane_f(__FILE__, __LINE__, ptr, sizeof(*(ptr)))) { \
printf("VM:%s:%d: SLABSANE(%s)\n", __FILE__, __LINE__, #ptr); \
@ -55,8 +44,19 @@
#else
#define SANITYCHECK
#define SLABSANITYCHECK(l)
#define USE(obj, code) do { code } while(0)
#define SLABSANE(ptr)
#endif
#if MEMPROTECT
#define USE(obj, code) do { \
slabunlock(obj, sizeof(*obj)); \
do { \
code \
} while(0); \
slablock(obj, sizeof(*obj)); \
} while(0)
#else
#define USE(obj, code) do { code } while(0)
#endif
#endif

View file

@ -18,6 +18,7 @@
#include <assert.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
#include <env.h>
@ -40,7 +41,7 @@
#define OFF(f, b) assert(!GETBIT(f, b))
#define ON(f, b) assert(GETBIT(f, b))
#if SANITYCHECKS
#if MEMPROTECT
#define SLABDATAWRITABLE(data, wr) do { \
assert(data->sdh.writable == WRITABLE_NONE); \
assert(wr != WRITABLE_NONE); \
@ -224,8 +225,10 @@ PRIVATE int checklist(char *file, int line,
while(n) {
int count = 0, i;
#if SANITYCHECKS
MYASSERT(n->sdh.magic1 == MAGIC1);
MYASSERT(n->sdh.magic2 == MAGIC2);
#endif
MYASSERT(n->sdh.list == l);
MYASSERT(usedpages_add(n->sdh.phys, VM_PAGE_SIZE) == OK);
if(n->sdh.prev)
@ -342,13 +345,18 @@ PUBLIC void *slaballoc(int bytes)
ret = ((char *) firstused->data) + i*bytes;
#if SANITYCHECKS
#if MEMPROTECT
nojunkwarning++;
slabunlock(ret, bytes);
nojunkwarning--;
assert(!nojunkwarning);
#endif
*(u32_t *) ret = NOJUNK;
#if MEMPROTECT
slablock(ret, bytes);
#endif
#endif
SLABSANITYCHECK(SCL_FUNCTIONS);
SLABDATAUSE(firstused, firstused->sdh.freeguess = i+1;);
@ -400,7 +408,7 @@ PRIVATE int objstats(void *mem, int bytes,
#if SANITYCHECKS
if(*(u32_t *) mem == JUNK && !nojunkwarning) {
util_stacktrace();
printf("VM: WARNING: JUNK seen in slab object\n");
printf("VM: WARNING: JUNK seen in slab object, likely freed\n");
}
#endif
/* Retrieve entry in slabs[]. */
@ -409,8 +417,10 @@ PRIVATE int objstats(void *mem, int bytes,
/* Round address down to VM_PAGE_SIZE boundary to get header. */
f = (struct slabdata *) ((char *) mem - (vir_bytes) mem % VM_PAGE_SIZE);
#if SANITYCHECKS
OBJSTATSCHECK(f->sdh.magic1 == MAGIC1);
OBJSTATSCHECK(f->sdh.magic2 == MAGIC2);
#endif
OBJSTATSCHECK(f->sdh.list == LIST_USED || f->sdh.list == LIST_FULL);
/* Make sure it's in range. */
@ -452,11 +462,17 @@ PUBLIC void slabfree(void *mem, int bytes)
if(*(u32_t *) mem == JUNK) {
printf("VM: WARNING: likely double free, JUNK seen\n");
}
#endif
#if SANITYCHECKS
#if MEMPROTECT
slabunlock(mem, bytes);
#endif
*(u32_t *) mem = JUNK;
nojunkwarning++;
#if MEMPROTECT
slablock(mem, bytes);
#endif
nojunkwarning--;
assert(!nojunkwarning);
#endif
@ -518,7 +534,7 @@ PUBLIC void slabunlock(void *mem, int bytes)
struct slabdata *f;
if(objstats(mem, bytes, &s, &f, &i) != OK)
panic("slablock objstats failed");
panic("slabunlock objstats failed");
SLABDATAWRITABLE(f, i);

15
servers/vm/unavl.h Normal file
View file

@ -0,0 +1,15 @@
#undef AVL_UNIQUE
#undef AVL_HANDLE
#undef AVL_KEY
#undef AVL_MAX_DEPTH
#undef AVL_NULL
#undef AVL_GET_LESS
#undef AVL_GET_GREATER
#undef AVL_SET_LESS
#undef AVL_SET_GREATER
#undef AVL_GET_BALANCE_FACTOR
#undef AVL_SET_BALANCE_FACTOR
#undef AVL_SET_ROOT
#undef AVL_COMPARE_KEY_KEY
#undef AVL_COMPARE_KEY_NODE
#undef AVL_COMPARE_NODE_NODE

View file

@ -8,6 +8,8 @@
#include <machine/archtypes.h>
#include "vm.h"
#include "physravl.h"
#include "yieldedavl.h"
struct vmproc;
@ -28,7 +30,7 @@ struct vmproc {
/* Regions in virtual address space. */
struct vir_region *vm_regions;
int vm_count;
yielded_avl vm_yielded_blocks; /* avl of yielded physblocks */
/* Heap for brk() to extend. */
struct vir_region *vm_heap;

20
servers/vm/yielded.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef _YIELDED_H
#define _YIELDED_H 1
#include <minix/type.h>
typedef struct yielded {
u64_t id;
phys_bytes addr, len;
endpoint_t owner;
/* LRU fields */
struct yielded *younger, *older;
/* AVL fields */
struct yielded *less, *greater;
int factor;
} yielded_t;
#endif

11
servers/vm/yieldedavl.c Normal file
View file

@ -0,0 +1,11 @@
#include <stddef.h>
#include <minix/u64.h>
#include "proto.h"
#include "sanitycheck.h"
#include "yielded.h"
#include "yieldedavl_defs.h"
#include "cavl_if.h"
#include "cavl_impl.h"

10
servers/vm/yieldedavl.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef _YIELDEDAVL_H
#define _YIELDEDAVL_H
#include "yielded.h"
#include "yieldedavl_defs.h"
#include "cavl_if.h"
#include "unavl.h"
#endif

View file

@ -0,0 +1,18 @@
#include <minix/u64.h>
#define AVL_UNIQUE(id) yielded_ ## id
#define AVL_HANDLE yielded_t *
#define AVL_KEY u64_t
#define AVL_MAX_DEPTH 30 /* good for 2 million nodes */
#define AVL_NULL NULL
#define AVL_GET_LESS(h, a) (h)->less
#define AVL_GET_GREATER(h, a) (h)->greater
#define AVL_SET_LESS(h1, h2) USE((h1), (h1)->less = h2;);
#define AVL_SET_GREATER(h1, h2) USE((h1), (h1)->greater = h2;);
#define AVL_GET_BALANCE_FACTOR(h) (h)->factor
#define AVL_SET_BALANCE_FACTOR(h, f) USE((h), (h)->factor = f;);
#define AVL_SET_ROOT(h, v) (h)->root = v;
#define AVL_COMPARE_KEY_KEY(k1, k2) cmp64((k1), (k2))
#define AVL_COMPARE_KEY_NODE(k, h) AVL_COMPARE_KEY_KEY((k), (h)->id)
#define AVL_COMPARE_NODE_NODE(h1, h2) AVL_COMPARE_KEY_KEY((h1)->id, (h2)->id)
#define AVL_INSIDE_STRUCT char pad[4];