diff --git a/commands/service/parse.c b/commands/service/parse.c index a18ebef3e..ea805a073 100644 --- a/commands/service/parse.c +++ b/commands/service/parse.c @@ -732,6 +732,8 @@ struct { "RS_UPDATE", VM_RS_UPDATE }, { "RS_MEMCTL", VM_RS_MEMCTL }, { "PROCCTL", VM_PROCCTL }, + { "MAPCACHEPAGE", VM_MAPCACHEPAGE }, + { "SETCACHEPAGE", VM_SETCACHEPAGE }, { NULL, 0 }, }; diff --git a/distrib/sets/lists/minix/mi b/distrib/sets/lists/minix/mi index bde486f80..4ae4a5feb 100644 --- a/distrib/sets/lists/minix/mi +++ b/distrib/sets/lists/minix/mi @@ -4634,14 +4634,14 @@ ./usr/tests/minix-posix/test70 minix-sys ./usr/tests/minix-posix/test71 minix-sys ./usr/tests/minix-posix/test72 minix-sys -./usr/tests/minix-posix/test73 minix-sys obsolete +./usr/tests/minix-posix/test73 minix-sys ./usr/tests/minix-posix/test7 minix-sys ./usr/tests/minix-posix/test8 minix-sys ./usr/tests/minix-posix/test9 minix-sys ./usr/tests/minix-posix/testinterp minix-sys ./usr/tests/minix-posix/testsh1 minix-sys ./usr/tests/minix-posix/testsh2 minix-sys -./usr/tests/minix-posix/testvm minix-sys obsolete +./usr/tests/minix-posix/testvm minix-sys ./usr/tests/minix-posix/testvm.conf minix-sys ./usr/tmp minix-sys ./usr/var minix-sys diff --git a/etc/system.conf b/etc/system.conf index 022492521..496888114 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -107,7 +107,7 @@ service mfs { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; # Only basic VM calls allowed + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS @@ -134,7 +134,7 @@ service ext2 { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; # Only basic VM calls allowed + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS @@ -147,7 +147,7 @@ service pfs { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; # Only basic VM calls allowed + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS diff --git a/include/minix/com.h b/include/minix/com.h index 1daa0409a..f92a567b0 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -993,12 +993,31 @@ /* To VM: map in cache block by FS */ #define VM_MAPCACHEPAGE (VM_RQ_BASE+26) +/* To VM: identify cache block in FS */ +#define VM_SETCACHEPAGE (VM_RQ_BASE+27) + +/* To VFS: fields for request from VM. */ +# define VFS_VMCALL_REQ m10_i1 +# define VFS_VMCALL_FD m10_i2 +# define VFS_VMCALL_REQID m10_i3 +# define VFS_VMCALL_ENDPOINT m10_i4 +# define VFS_VMCALL_OFFSET_LO m10_l1 +# define VFS_VMCALL_OFFSET_HI m10_l2 +# define VFS_VMCALL_LENGTH m10_l3 + +/* Request codes to from VM to VFS */ +#define VMVFSREQ_FDLOOKUP 101 +#define VMVFSREQ_FDCLOSE 102 +#define VMVFSREQ_FDIO 103 + /* Calls from VFS. */ -# define VMV_ENDPOINT m1_i1 /* for all VM_VFS_REPLY_* */ -#define VM_VFS_REPLY_OPEN (VM_RQ_BASE+30) -# define VMVRO_FD m1_i2 -#define VM_VFS_REPLY_MMAP (VM_RQ_BASE+31) -#define VM_VFS_REPLY_CLOSE (VM_RQ_BASE+32) +#define VM_VFS_REPLY (VM_RQ_BASE+30) +# define VMV_ENDPOINT m10_i1 +# define VMV_RESULT m10_i2 +# define VMV_REQID m10_i3 +# define VMV_DEV m10_i4 +# define VMV_INO m10_l1 +# define VMV_FD m10_l2 #define VM_REMAP (VM_RQ_BASE+33) # define VMRE_D m1_i1 diff --git a/include/minix/ipc.h b/include/minix/ipc.h index a0f881365..fc61d6c09 100644 --- a/include/minix/ipc.h +++ b/include/minix/ipc.h @@ -30,6 +30,30 @@ typedef struct {long m9l1, m9l2, m9l3, m9l4, m9l5; typedef struct {int m10i1, m10i2, m10i3, m10i4; long m10l1, m10l2, m10l3; } mess_10; +typedef struct { + void *block; + u32_t dev_offset_pages; + u32_t ino_offset_pages; + u32_t ino; + u32_t *flags_ptr; + u32_t dev; + u8_t pages; + u8_t flags; +} mess_vmmcp __packed; + +typedef struct { + endpoint_t who; + u32_t offset; + u32_t dev; + u32_t ino; + u32_t vaddr; + u32_t len; + u16_t fd; + u16_t clearend_and_flags; /* low 12 bits are clearend, rest flags */ +} mess_vm_vfs_mmap __packed; + +typedef struct { u8_t flags; void *addr; } mess_vmmcp_reply __packed; + typedef struct { endpoint_t m_source; /* who sent the message */ int m_type; /* what kind of message is it */ @@ -44,6 +68,9 @@ typedef struct { mess_6 m_m6; mess_9 m_m9; mess_10 m_m10; + mess_vmmcp m_vmmcp; + mess_vmmcp_reply m_vmmcp_reply; + mess_vm_vfs_mmap m_vm_vfs; } m_u; } message __aligned(16); diff --git a/include/minix/libminixfs.h b/include/minix/libminixfs.h index bbeda8877..0be011735 100644 --- a/include/minix/libminixfs.h +++ b/include/minix/libminixfs.h @@ -17,9 +17,16 @@ struct buf { struct buf *lmfs_hash; /* used to link bufs on hash chains */ block_t lmfs_blocknr; /* block number of its (minor) device */ dev_t lmfs_dev; /* major | minor device where block resides */ - char lmfs_dirt; /* BP_CLEAN or BP_DIRTY */ char lmfs_count; /* number of users of this buffer */ + char lmfs_needsetcache; /* to be identified to VM */ unsigned int lmfs_bytes; /* Number of bytes allocated in bp */ + u32_t lmfs_flags; /* Flags shared between VM and FS */ + + /* If any, which inode & offset does this block correspond to? + * If none, VMC_NO_INODE + */ + ino_t lmfs_inode; + u64_t lmfs_inode_offset; }; int fs_lookup_credentials(vfs_ucred_t *credentials, @@ -42,10 +49,13 @@ void lmfs_reset_rdwt_err(void); int lmfs_rdwt_err(void); void lmfs_buf_pool(int new_nr_bufs); struct buf *lmfs_get_block(dev_t dev, block_t block,int only_search); +struct buf *lmfs_get_block_ino(dev_t dev, block_t block,int only_search, + ino_t ino, u64_t off); void lmfs_invalidate(dev_t device); void lmfs_put_block(struct buf *bp, int block_type); void lmfs_rw_scattered(dev_t, struct buf **, int, int); void lmfs_setquiet(int q); +int lmfs_do_bpeek(message *); /* calls that libminixfs does into fs */ void fs_blockstats(u32_t *blocks, u32_t *free, u32_t *used); diff --git a/include/minix/vm.h b/include/minix/vm.h index acb0a901a..698905954 100644 --- a/include/minix/vm.h +++ b/include/minix/vm.h @@ -65,5 +65,20 @@ int vm_info_region(endpoint_t who, struct vm_region_info *vri, int count, vir_bytes *next); int vm_procctl(endpoint_t ep, int param); +int vm_set_cacheblock(void *block, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize); + +void *vm_map_cacheblock(u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize); + +/* flags for vm cache functions */ +#define VMMC_FLAGS_LOCKED 0x01 /* someone is updating the flags; don't read/write */ +#define VMMC_DIRTY 0x02 /* dirty buffer and it may not be evicted */ +#define VMMC_EVICTED 0x04 /* VM has evicted the buffer and it's invalid */ +#define VMMC_BLOCK_LOCKED 0x08 /* client is using it and it may not be evicted */ + +/* special inode number for vm cache functions */ +#define VMC_NO_INODE 0 /* to reference a disk block, no associated file */ + #endif /* _MINIX_VM_H */ diff --git a/lib/libminixfs/Makefile b/lib/libminixfs/Makefile index ade070374..ae0324ebc 100644 --- a/lib/libminixfs/Makefile +++ b/lib/libminixfs/Makefile @@ -1,6 +1,5 @@ # Makefile for libminixfs .include - LIB= minixfs SRCS= fetch_credentials.c cache.c diff --git a/lib/libminixfs/cache.c b/lib/libminixfs/cache.c index 37d7d2901..eb55bc808 100644 --- a/lib/libminixfs/cache.c +++ b/lib/libminixfs/cache.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -16,9 +16,6 @@ #include #include -#define BP_CLEAN 0 /* on-disk block and memory copies identical */ -#define BP_DIRTY 1 /* on-disk block and memory copies differ */ - #define BUFHASH(b) ((b) % nr_bufs) #define MARKCLEAN lmfs_markclean @@ -31,6 +28,7 @@ static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ static void rm_lru(struct buf *bp); static void read_block(struct buf *); static void flushall(dev_t dev); +static void freeblock(struct buf *bp); static int vmcache = 0; /* are we using vm's secondary cache? (initially not) */ @@ -58,13 +56,6 @@ u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u32_t bfree, bused = btotal-bfree; - /* but we simply need minbufs no matter what, and we don't - * want more than that if we're a memory device - */ - if(majordev == MEMORY_MAJOR) { - return minbufs; - } - /* set a reasonable cache size; cache at most a certain * portion of the used FS, and at most a certain %age of remaining * memory @@ -101,19 +92,19 @@ u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u32_t bfree, void lmfs_markdirty(struct buf *bp) { - bp->lmfs_dirt = BP_DIRTY; + bp->lmfs_flags |= VMMC_DIRTY; } void lmfs_markclean(struct buf *bp) { - bp->lmfs_dirt = BP_CLEAN; + bp->lmfs_flags &= ~VMMC_DIRTY; } int lmfs_isclean(struct buf *bp) { - return bp->lmfs_dirt == BP_CLEAN; + return !(bp->lmfs_flags & VMMC_DIRTY); } dev_t @@ -127,14 +118,109 @@ int lmfs_bytes(struct buf *bp) return bp->lmfs_bytes; } +static void +free_unused_blocks(void) +{ + struct buf *bp; + + int freed = 0, bytes = 0; + printf("libminixfs: freeing; %d blocks in use\n", bufs_in_use); + for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { + if(bp->lmfs_bytes > 0 && bp->lmfs_count == 0) { + freed++; + bytes += bp->lmfs_bytes; + freeblock(bp); + } + } + printf("libminixfs: freeing; %d blocks, %d bytes\n", freed, bytes); +} + +static void +lmfs_alloc_block(struct buf *bp) +{ + ASSERT(!bp->data); + ASSERT(bp->lmfs_bytes == 0); + ASSERT(!(fs_block_size % PAGE_SIZE)); + if((bp->data = minix_mmap(0, fs_block_size, + PROT_READ|PROT_WRITE, MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { + free_unused_blocks(); + if((bp->data = minix_mmap(0, fs_block_size, PROT_READ|PROT_WRITE, + MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { + panic("libminixfs: could not allocate block"); + } + } + assert(bp->data); + bp->lmfs_bytes = fs_block_size; + bp->lmfs_needsetcache = 1; +} + /*===========================================================================* * lmfs_get_block * *===========================================================================*/ -struct buf *lmfs_get_block( - register dev_t dev, /* on which device is the block? */ - register block_t block, /* which block is wanted? */ - int only_search /* if NO_READ, don't read, else act normal */ -) +struct buf *lmfs_get_block(register dev_t dev, register block_t block, + int only_search) +{ + return lmfs_get_block_ino(dev, block, only_search, VMC_NO_INODE, 0); +} + +void minix_munmap_t(void *a, int len) +{ + vir_bytes av = (vir_bytes) a; + assert(a); + assert(a != MAP_FAILED); + assert(len > 0); + assert(!(len % PAGE_SIZE)); + assert(!(av % PAGE_SIZE)); + + if(minix_munmap(a, len) < 0) + panic("libminixfs cache: munmap failed"); +} + +static void raisecount(struct buf *bp) +{ + assert(bufs_in_use >= 0); + ASSERT(bp->lmfs_count >= 0); + bp->lmfs_count++; + if(bp->lmfs_count == 1) bufs_in_use++; + assert(bufs_in_use > 0); +} + +static void lowercount(struct buf *bp) +{ + assert(bufs_in_use > 0); + ASSERT(bp->lmfs_count > 0); + bp->lmfs_count--; + if(bp->lmfs_count == 0) bufs_in_use--; + assert(bufs_in_use >= 0); +} + +static void freeblock(struct buf *bp) +{ + ASSERT(bp->lmfs_count == 0); + /* If the block taken is dirty, make it clean by writing it to the disk. + * Avoid hysteresis by flushing all other dirty blocks for the same device. + */ + if (bp->lmfs_dev != NO_DEV) { + if (!lmfs_isclean(bp)) flushall(bp->lmfs_dev); + assert(bp->lmfs_bytes == fs_block_size); + bp->lmfs_dev = NO_DEV; + } + + /* Fill in block's parameters and add it to the hash chain where it goes. */ + MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ + if(bp->lmfs_bytes > 0) { + assert(bp->data); + minix_munmap_t(bp->data, bp->lmfs_bytes); + bp->lmfs_bytes = 0; + bp->data = NULL; + } else assert(!bp->data); +} + +/*===========================================================================* + * lmfs_get_block_ino * + *===========================================================================*/ +struct buf *lmfs_get_block_ino(dev_t dev, block_t block, int only_search, + ino_t ino, u64_t ino_off) { /* Check to see if the requested block is in the block cache. If so, return * a pointer to it. If not, evict some other block and fetch it (unless @@ -152,8 +238,9 @@ struct buf *lmfs_get_block( */ int b; - static struct buf *bp, *prev_ptr; - u64_t yieldid = VM_BLOCKID_NONE /*, getid = make64(dev, block) */; + static struct buf *bp; + u64_t dev_off = (u64_t) block * fs_block_size; + struct buf *prev_ptr; assert(buf_hash); assert(buf); @@ -163,22 +250,52 @@ struct buf *lmfs_get_block( assert(dev != NO_DEV); - /* Search the hash chain for (dev, block). Do_read() can use - * lmfs_get_block(NO_DEV ...) to get an unnamed block to fill with zeros when - * someone wants to read from a hole in a file, in which case this search - * is skipped - */ + if((ino_off % fs_block_size)) { + + printf("cache: unaligned lmfs_get_block_ino ino_off %llu\n", + ino_off); + util_stacktrace(); + } + + /* Search the hash chain for (dev, block). */ b = BUFHASH(block); bp = buf_hash[b]; while (bp != NULL) { if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) { + if(bp->lmfs_flags & VMMC_EVICTED) { + /* We had it but VM evicted it; invalidate it. */ + ASSERT(bp->lmfs_count == 0); + ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); + ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); + bp->lmfs_dev = NO_DEV; + bp->lmfs_bytes = 0; + bp->data = NULL; + break; + } + ASSERT(bp->lmfs_needsetcache == 0); /* Block needed has been found. */ - if (bp->lmfs_count == 0) rm_lru(bp); - bp->lmfs_count++; /* record that block is in use */ + if (bp->lmfs_count == 0) { + rm_lru(bp); + ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); + bp->lmfs_flags |= VMMC_BLOCK_LOCKED; + } + raisecount(bp); ASSERT(bp->lmfs_bytes == fs_block_size); ASSERT(bp->lmfs_dev == dev); ASSERT(bp->lmfs_dev != NO_DEV); + ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); ASSERT(bp->data); + + if(ino != VMC_NO_INODE) { + if(bp->lmfs_inode == VMC_NO_INODE + || bp->lmfs_inode != ino + || bp->lmfs_inode_offset != ino_off) { + bp->lmfs_inode = ino; + bp->lmfs_inode_offset = ino_off; + bp->lmfs_needsetcache = 1; + } + } + return(bp); } else { /* This block is not the one sought. */ @@ -186,29 +303,13 @@ struct buf *lmfs_get_block( } } - /* Desired block is not on available chain. Take oldest block ('front'). */ - if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); - - if(bp->lmfs_bytes < fs_block_size) { - ASSERT(!bp->data); - ASSERT(bp->lmfs_bytes == 0); - if(!(bp->data = alloc_contig( (size_t) fs_block_size, 0, NULL))) { - printf("fs cache: couldn't allocate a new block.\n"); - for(bp = front; - bp && bp->lmfs_bytes < fs_block_size; bp = bp->lmfs_next) - ; - if(!bp) { - panic("no buffer available"); - } - } else { - bp->lmfs_bytes = fs_block_size; - } + /* Desired block is not on available chain. Find a free block to use. */ + if(bp) { + ASSERT(bp->lmfs_flags & VMMC_EVICTED); + } else { + if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); } - - ASSERT(bp); - ASSERT(bp->data); - ASSERT(bp->lmfs_bytes == fs_block_size); - ASSERT(bp->lmfs_count == 0); + assert(bp); rm_lru(bp); @@ -228,25 +329,17 @@ struct buf *lmfs_get_block( } } - /* If the block taken is dirty, make it clean by writing it to the disk. - * Avoid hysteresis by flushing all other dirty blocks for the same device. - */ - if (bp->lmfs_dev != NO_DEV) { - if (bp->lmfs_dirt == BP_DIRTY) flushall(bp->lmfs_dev); + freeblock(bp); - /* Are we throwing out a block that contained something? - * Give it to VM for the second-layer cache. - */ - yieldid = make64(bp->lmfs_dev, bp->lmfs_blocknr); - assert(bp->lmfs_bytes == fs_block_size); - bp->lmfs_dev = NO_DEV; - } + bp->lmfs_inode = ino; + bp->lmfs_inode_offset = ino_off; - /* Fill in block's parameters and add it to the hash chain where it goes. */ - MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ + bp->lmfs_flags = VMMC_BLOCK_LOCKED; + bp->lmfs_needsetcache = 0; bp->lmfs_dev = dev; /* fill in device number */ bp->lmfs_blocknr = block; /* fill in block number */ - bp->lmfs_count++; /* record that block is being used */ + ASSERT(bp->lmfs_count == 0); + raisecount(bp); b = BUFHASH(bp->lmfs_blocknr); bp->lmfs_hash = buf_hash[b]; @@ -254,23 +347,26 @@ struct buf *lmfs_get_block( assert(dev != NO_DEV); - /* Go get the requested block unless searching or prefetching. */ - if(only_search == PREFETCH || only_search == NORMAL) { - /* Block is not found in our cache, but we do want it - * if it's in the vm cache. - */ - if(vmcache) { - /* If we can satisfy the PREFETCH or NORMAL request - * from the vm cache, work is done. - */ -#if 0 - if(vm_yield_block_get_block(yieldid, getid, - bp->data, fs_block_size) == OK) { - return bp; - } -#endif + /* Block is not found in our cache, but we do want it + * if it's in the vm cache. + */ + assert(!bp->data); + assert(!bp->lmfs_bytes); + if(vmcache) { + if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, + &bp->lmfs_flags, fs_block_size)) != MAP_FAILED) { + bp->lmfs_bytes = fs_block_size; + ASSERT(!bp->lmfs_needsetcache); + return bp; } } + bp->data = NULL; + + /* Not in the cache; reserve memory for its contents. */ + + lmfs_alloc_block(bp); + + assert(bp->data); if(only_search == PREFETCH) { /* PREFETCH: don't do i/o. */ @@ -278,15 +374,7 @@ struct buf *lmfs_get_block( } else if (only_search == NORMAL) { read_block(bp); } else if(only_search == NO_READ) { - /* we want this block, but its contents - * will be overwritten. VM has to forget - * about it. - */ -#if 0 - if(vmcache) { - vm_forgetblock(getid); - } -#endif + /* This block will be overwritten by new contents. */ } else panic("unexpected only_search value: %d", only_search); @@ -310,15 +398,21 @@ int block_type; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ * the integrity of the file system (e.g., inode blocks) are written to * disk immediately if they are dirty. */ + dev_t dev; + u64_t dev_off; + int r; + if (bp == NULL) return; /* it is easier to check here than in caller */ - bp->lmfs_count--; /* there is one use fewer now */ + dev = bp->lmfs_dev; + + dev_off = (u64_t) bp->lmfs_blocknr * fs_block_size; + + lowercount(bp); if (bp->lmfs_count != 0) return; /* block is still in use */ - bufs_in_use--; /* one fewer block buffers in use */ - /* Put this block back on the LRU chain. */ - if (bp->lmfs_dev == DEV_RAM || (block_type & ONE_SHOT)) { + if (dev == DEV_RAM || (block_type & ONE_SHOT)) { /* Block probably won't be needed quickly. Put it on front of chain. * It will be the next block to be evicted from the cache. */ @@ -342,6 +436,25 @@ int block_type; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ rear->lmfs_next = bp; rear = bp; } + + assert(bp->lmfs_flags & VMMC_BLOCK_LOCKED); + bp->lmfs_flags &= ~VMMC_BLOCK_LOCKED; + + /* block has sensible content - if necesary, identify it to VM */ + if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { + if((r=vm_set_cacheblock(bp->data, dev, dev_off, + bp->lmfs_inode, bp->lmfs_inode_offset, + &bp->lmfs_flags, fs_block_size)) != OK) { + if(r == ENOSYS) { + printf("libminixfs: ENOSYS, disabling VM calls\n"); + vmcache = 0; + } else { + panic("libminixfs: setblock of 0x%lx dev 0x%x off " + "0x%llx failed\n", bp->data, dev, dev_off); + } + } + } + bp->lmfs_needsetcache = 0; } /*===========================================================================* @@ -363,9 +476,28 @@ register struct buf *bp; /* buffer pointer */ assert(dev != NO_DEV); + ASSERT(bp->lmfs_bytes == fs_block_size); + ASSERT(fs_block_size > 0); + ASSERT(!(fs_block_size % PAGE_SIZE)); + pos = mul64u(bp->lmfs_blocknr, fs_block_size); - r = bdev_read(dev, pos, bp->data, fs_block_size, - BDEV_NOFLAGS); + if(fs_block_size > PAGE_SIZE) { +#define MAXPAGES 20 + vir_bytes vaddr = (vir_bytes) bp->data; + int p; + static iovec_t iovec[MAXPAGES]; + int pages = fs_block_size/PAGE_SIZE; + ASSERT(pages > 1 && pages < MAXPAGES); + for(p = 0; p < pages; p++) { + iovec[p].iov_addr = vaddr; + iovec[p].iov_size = PAGE_SIZE; + vaddr += PAGE_SIZE; + } + r = bdev_gather(dev, pos, iovec, pages, BDEV_NOFLAGS); + } else { + r = bdev_read(dev, pos, bp->data, fs_block_size, + BDEV_NOFLAGS); + } if (r < 0) { printf("fs cache: I/O error on device %d/%d, block %u\n", major(dev), minor(dev), bp->lmfs_blocknr); @@ -381,6 +513,7 @@ register struct buf *bp; /* buffer pointer */ /* Report read errors to interested parties. */ rdwt_err = r; } + } /*===========================================================================* @@ -394,10 +527,16 @@ void lmfs_invalidate( register struct buf *bp; - for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) - if (bp->lmfs_dev == device) bp->lmfs_dev = NO_DEV; - - /* vm_forgetblocks(); */ + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { + if (bp->lmfs_dev == device) { + assert(bp->data); + assert(bp->lmfs_bytes > 0); + minix_munmap_t(bp->data, bp->lmfs_bytes); + bp->lmfs_dev = NO_DEV; + bp->lmfs_bytes = 0; + bp->data = NULL; + } + } } /*===========================================================================* @@ -423,7 +562,7 @@ static void flushall(dev_t dev) } for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { - if (bp->lmfs_dirt == BP_DIRTY && bp->lmfs_dev == dev) { + if (!lmfs_isclean(bp) && bp->lmfs_dev == dev) { dirty[ndirty++] = bp; } } @@ -449,16 +588,22 @@ void lmfs_rw_scattered( register iovec_t *iop; static iovec_t *iovec = NULL; u64_t pos; - int j, r; + int iov_per_block; STATICINIT(iovec, NR_IOREQS); + assert(dev != NO_DEV); + assert(!(fs_block_size % PAGE_SIZE)); + assert(fs_block_size > 0); + iov_per_block = fs_block_size / PAGE_SIZE; + /* (Shell) sort buffers on lmfs_blocknr. */ gap = 1; do gap = 3 * gap + 1; while (gap <= bufqsize); while (gap != 1) { + int j; gap /= 3; for (j = gap; j < bufqsize; j++) { for (i = j - gap; @@ -475,17 +620,33 @@ void lmfs_rw_scattered( * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { - for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) { - bp = bufq[j]; - if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + j) break; - iop->iov_addr = (vir_bytes) bp->data; - iop->iov_size = (vir_bytes) fs_block_size; + int nblocks = 0, niovecs = 0; + int r; + for (iop = iovec; nblocks < bufqsize; nblocks++) { + int p; + vir_bytes vdata; + bp = bufq[nblocks]; + if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + nblocks) + break; + if(niovecs >= NR_IOREQS-iov_per_block) break; + vdata = (vir_bytes) bp->data; + for(p = 0; p < iov_per_block; p++) { + iop->iov_addr = vdata; + iop->iov_size = PAGE_SIZE; + vdata += PAGE_SIZE; + iop++; + niovecs++; + } } + + assert(nblocks > 0); + assert(niovecs > 0); + pos = mul64u(bufq[0]->lmfs_blocknr, fs_block_size); if (rw_flag == READING) - r = bdev_gather(dev, pos, iovec, j, BDEV_NOFLAGS); + r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); else - r = bdev_scatter(dev, pos, iovec, j, BDEV_NOFLAGS); + r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. @@ -494,13 +655,12 @@ void lmfs_rw_scattered( printf("fs cache: I/O error %d on device %d/%d, block %u\n", r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); } - for (i = 0; i < j; i++) { + for (i = 0; i < nblocks; i++) { bp = bufq[i]; if (r < (ssize_t) fs_block_size) { /* Transfer failed. */ if (i == 0) { bp->lmfs_dev = NO_DEV; /* Invalidate block */ - /* vm_forgetblocks(); */ } break; } @@ -512,8 +672,8 @@ void lmfs_rw_scattered( } r -= fs_block_size; } - bufq += i; - bufqsize -= i; + bufq += nblocks; + bufqsize -= nblocks; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. @@ -543,7 +703,6 @@ struct buf *bp; /* Remove a block from its LRU chain. */ struct buf *next_ptr, *prev_ptr; - bufs_in_use++; next_ptr = bp->lmfs_next; /* successor on LRU chain */ prev_ptr = bp->lmfs_prev; /* predecessor on LRU chain */ if (prev_ptr != NULL) @@ -599,13 +758,10 @@ void lmfs_set_blocksize(int new_block_size, int major) * - our main FS device isn't a memory device */ -#if 0 vmcache = 0; - if(vm_forgetblock(VM_BLOCKID_NONE) != ENOSYS && - may_use_vmcache && major != MEMORY_MAJOR) { + + if(may_use_vmcache) vmcache = 1; - } -#endif } /*===========================================================================* @@ -624,7 +780,7 @@ void lmfs_buf_pool(int new_nr_bufs) for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { if(bp->data) { assert(bp->lmfs_bytes > 0); - free_contig(bp->data, bp->lmfs_bytes); + minix_munmap_t(bp->data, bp->lmfs_bytes); } } } @@ -659,8 +815,6 @@ void lmfs_buf_pool(int new_nr_bufs) for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->lmfs_hash = bp->lmfs_next; buf_hash[0] = front; - - /* vm_forgetblocks(); */ } int lmfs_bufs_in_use(void) @@ -677,7 +831,7 @@ void lmfs_flushall(void) { struct buf *bp; for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) - if(bp->lmfs_dev != NO_DEV && bp->lmfs_dirt == BP_DIRTY) + if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) flushall(bp->lmfs_dev); } @@ -700,3 +854,4 @@ int lmfs_rdwt_err(void) { return rdwt_err; } + diff --git a/lib/libsys/Makefile b/lib/libsys/Makefile index 0d4d43785..490f62e75 100644 --- a/lib/libsys/Makefile +++ b/lib/libsys/Makefile @@ -79,6 +79,7 @@ SRCS+= \ tickdelay.c \ timers.c \ vm_brk.c \ + vm_cache.c \ vm_exit.c \ vm_fork.c \ vm_info.c \ diff --git a/lib/libsys/vm_cache.c b/lib/libsys/vm_cache.c new file mode 100644 index 000000000..73883203c --- /dev/null +++ b/lib/libsys/vm_cache.c @@ -0,0 +1,62 @@ + +#include "syslib.h" + +#include +#include + +#include +#include +#include +#include + +int vm_cachecall(message *m, int call, void *addr, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + if(blocksize % PAGE_SIZE) + panic("blocksize %d should be a multiple of pagesize %d\n", + blocksize, PAGE_SIZE); + + if(ino_offset % PAGE_SIZE) + panic("inode offset %d should be a multiple of pagesize %d\n", + ino_offset, PAGE_SIZE); + + if(dev_offset % PAGE_SIZE) + panic("dev offset offset %d should be a multiple of pagesize %d\n", + dev_offset, PAGE_SIZE); + + memset(m, 0, sizeof(*m)); + + assert(dev != NO_DEV); + + m->m_u.m_vmmcp.dev_offset_pages = dev_offset/PAGE_SIZE; + m->m_u.m_vmmcp.ino_offset_pages = ino_offset/PAGE_SIZE; + m->m_u.m_vmmcp.ino = ino; + m->m_u.m_vmmcp.block = addr; + m->m_u.m_vmmcp.flags_ptr = flags; + m->m_u.m_vmmcp.dev = dev; + m->m_u.m_vmmcp.pages = blocksize / PAGE_SIZE; + m->m_u.m_vmmcp.flags = 0; + + return _taskcall(VM_PROC_NR, call, m); +} + +void *vm_map_cacheblock(u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + message m; + + if(vm_cachecall(&m, VM_MAPCACHEPAGE, NULL, dev, dev_offset, + ino, ino_offset, flags, blocksize) != OK) + return MAP_FAILED; + + return m.m_u.m_vmmcp_reply.addr; +} + +int vm_set_cacheblock(void *block, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + message m; + + return vm_cachecall(&m, VM_SETCACHEPAGE, block, dev, dev_offset, + ino, ino_offset, flags, blocksize); +} diff --git a/servers/vm/Makefile b/servers/vm/Makefile index 1db8f8288..ea9a67f37 100644 --- a/servers/vm/Makefile +++ b/servers/vm/Makefile @@ -5,7 +5,8 @@ PROG= vm SRCS= main.c alloc.c utility.c exit.c fork.c break.c \ mmap.c slaballoc.c region.c pagefaults.c \ rs.c queryexit.c pb.c regionavl.c \ - mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c + mem_anon.c mem_directphys.c mem_anon_contig.c mem_shared.c \ + mem_cache.c cache.c .if ${MACHINE_ARCH} == "earm" LDFLAGS+= -T ${.CURDIR}/arch/${MACHINE_ARCH}/vm.lds diff --git a/servers/vm/alloc.c b/servers/vm/alloc.c index 658cbd2c5..09c252fca 100644 --- a/servers/vm/alloc.c +++ b/servers/vm/alloc.c @@ -257,11 +257,9 @@ phys_clicks alloc_mem(phys_clicks clicks, u32_t memflags) clicks += align_clicks; } - mem = alloc_pages(clicks, memflags); - if(mem == NO_MEM) { - /* free_yielded(clicks * CLICK_SIZE); */ - mem = alloc_pages(clicks, memflags); - } + do { + mem = alloc_pages(clicks, memflags); + } while(mem == NO_MEM && cache_freepages(clicks) > 0); if(mem == NO_MEM) return mem; diff --git a/servers/vm/break.c b/servers/vm/break.c index 0c38eab73..7ecaa41cb 100644 --- a/servers/vm/break.c +++ b/servers/vm/break.c @@ -65,8 +65,9 @@ int real_brk(vmp, v) struct vmproc *vmp; vir_bytes v; { - if(map_region_extend_upto_v(vmp, v) == OK) + if(map_region_extend_upto_v(vmp, v) == OK) { return OK; + } return(ENOMEM); } diff --git a/servers/vm/cache.c b/servers/vm/cache.c new file mode 100644 index 000000000..fd6be5ab3 --- /dev/null +++ b/servers/vm/cache.c @@ -0,0 +1,311 @@ + +/* File that implements the data structure, insert, lookup and remove + * functions for file system cache blocks. + * + * Cache blocks can be mapped into the memory of processes by the + * 'cache' and 'file' memory types. + */ + +#include +#include + +#include + +#include "proto.h" +#include "vm.h" +#include "region.h" +#include "glo.h" +#include "cache.h" + +/* cache datastructure */ +#define HASHSIZE 65536 + +static struct cached_page *cache_hash_bydev[HASHSIZE]; +static struct cached_page *cache_hash_byino[HASHSIZE]; +static struct cached_page *lru_oldest = NULL, *lru_newest = NULL; + +static u32_t cached_pages = 0; + +static void lru_rm(struct cached_page *hb) +{ + struct cached_page *newer = hb->newer, *older = hb->older; + assert(lru_newest); + assert(lru_oldest); + if(newer) { + assert(newer->older == hb); + newer->older = older; + } + if(older) { + assert(older->newer == hb); + older->newer = newer; + } + + if(lru_newest == hb) { assert(!newer); lru_newest = older; } + if(lru_oldest == hb) { assert(!older); lru_oldest = newer; } + + if(lru_newest) assert(lru_newest->newer == NULL); + if(lru_oldest) assert(lru_oldest->older == NULL); + + cached_pages--; +} + +static void lru_add(struct cached_page *hb) +{ + if(lru_newest) { + assert(lru_oldest); + assert(!lru_newest->newer); + lru_newest->newer = hb; + } else { + assert(!lru_oldest); + lru_oldest = hb; + } + + hb->older = lru_newest; + hb->newer = NULL; + lru_newest = hb; + + cached_pages++; +} + +void cache_lru_touch(struct cached_page *hb) +{ + lru_rm(hb); + lru_add(hb); +} + +static __inline u32_t makehash(u32_t p1, u64_t p2) +{ + u32_t offlo = ex64lo(p2), offhi = ex64hi(p2), + v = 0x12345678; + hash_mix(p1, offlo, offhi); + hash_final(offlo, offhi, v); + + return v % HASHSIZE; +} + +#if CACHE_SANITY +void cache_sanitycheck_internal(void) +{ + int h; + int n = 0; + int byino = 0; + int withino = 0; + int bydev_total = 0, lru_total = 0; + struct cached_page *cp; + + for(h = 0; h < HASHSIZE; h++) { + for(cp = cache_hash_bydev[h]; cp; cp = cp->hash_next_dev) { + assert(cp->dev != NO_DEV); + assert(h == makehash(cp->dev, cp->dev_offset)); + assert(cp == find_cached_page_bydev(cp->dev, cp->dev_offset, cp->ino, cp->ino_offset)); + if(cp->ino != VMC_NO_INODE) withino++; + bydev_total++; + n++; + assert(n < 1500000); + } + for(cp = cache_hash_byino[h]; cp; cp = cp->hash_next_ino) { + assert(cp->dev != NO_DEV); + assert(cp->ino != VMC_NO_INODE); + assert(h == makehash(cp->ino, cp->ino_offset)); + byino++; + n++; + assert(n < 1500000); + } + } + + assert(byino == withino); + + if(lru_newest) { + assert(lru_oldest); + assert(!lru_newest->newer); + assert(!lru_oldest->older); + } else { + assert(!lru_oldest); + } + + for(cp = lru_oldest; cp; cp = cp->newer) { + struct cached_page *newer = cp->newer, + *older = cp->older; + if(newer) assert(newer->older == cp); + if(older) assert(older->newer == cp); + lru_total++; + } + + assert(lru_total == bydev_total); + + assert(lru_total == cached_pages); +} +#endif + +#define rmhash_f(fname, nextfield) \ +static void \ +fname(struct cached_page *cp, struct cached_page **head) \ +{ \ + struct cached_page *hb; \ + if(*head == cp) { *head = cp->nextfield; return; } \ + for(hb = *head; hb && cp != hb->nextfield; hb = hb->nextfield) ; \ + assert(hb); assert(hb->nextfield == cp); \ + hb->nextfield = cp->nextfield; \ + return; \ +} + +rmhash_f(rmhash_byino, hash_next_ino) +rmhash_f(rmhash_bydev, hash_next_dev) + +static void addcache_byino(struct cached_page *hb) +{ + int hv_ino = makehash(hb->ino, hb->ino_offset); + assert(hb->ino != VMC_NO_INODE); + hb->hash_next_ino = cache_hash_byino[hv_ino]; + cache_hash_byino[hv_ino] = hb; +} + +static void +update_inohash(struct cached_page *hb, ino_t ino, u64_t ino_off) +{ + assert(ino != VMC_NO_INODE); + if(hb->ino != VMC_NO_INODE) { + int h = makehash(hb->ino, hb->ino_offset); + rmhash_byino(hb, &cache_hash_byino[h]); + } + hb->ino = ino; + hb->ino_offset = ino_off; + addcache_byino(hb); +} + +struct cached_page * +find_cached_page_bydev(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, int touchlru) +{ + struct cached_page *hb; + + for(hb = cache_hash_bydev[makehash(dev, dev_off)]; hb; hb=hb->hash_next_dev) { + if(hb->dev == dev && hb->dev_offset == dev_off) { + if(ino != VMC_NO_INODE) { + if(hb->ino != ino || hb->ino_offset != ino_off) { + update_inohash(hb, ino, ino_off); + } + } + + if(touchlru) cache_lru_touch(hb); + + return hb; + } + } + + return NULL; +} + +struct cached_page *find_cached_page_byino(dev_t dev, ino_t ino, u64_t ino_off, int touchlru) +{ + struct cached_page *hb; + + assert(ino != VMC_NO_INODE); + assert(dev != NO_DEV); + + for(hb = cache_hash_byino[makehash(ino, ino_off)]; hb; hb=hb->hash_next_ino) { + if(hb->dev == dev && hb->ino == ino && hb->ino_offset == ino_off) { + if(touchlru) cache_lru_touch(hb); + + return hb; + } + } + + return NULL; +} + +int addcache(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, struct phys_block *pb) +{ + int hv_dev; + struct cached_page *hb; + + if(pb->flags & PBF_INCACHE) { + printf("VM: already in cache\n"); + return EINVAL; + } + + if(!SLABALLOC(hb)) { + printf("VM: no memory for cache node\n"); + return ENOMEM; + } + + assert(dev != NO_DEV); +#if CACHE_SANITY + assert(!find_cached_page_bydev(dev, dev_off, ino, ino_off)); +#endif + + hb->dev = dev; + hb->dev_offset = dev_off; + hb->ino = ino; + hb->ino_offset = ino_off; + hb->page = pb; + hb->page->refcount++; /* block also referenced by cache now */ + hb->page->flags |= PBF_INCACHE; + + hv_dev = makehash(dev, dev_off); + + hb->hash_next_dev = cache_hash_bydev[hv_dev]; + cache_hash_bydev[hv_dev] = hb; + + if(hb->ino != VMC_NO_INODE) + addcache_byino(hb); + + lru_add(hb); + + return OK; +} + +void rmcache(struct cached_page *cp) +{ + struct phys_block *pb = cp->page; + int hv_dev = makehash(cp->dev, cp->dev_offset); + + assert(cp->page->flags & PBF_INCACHE); + + cp->page->flags &= ~PBF_INCACHE; + + rmhash_bydev(cp, &cache_hash_bydev[hv_dev]); + if(cp->ino != VMC_NO_INODE) { + int hv_ino = makehash(cp->ino, cp->ino_offset); + rmhash_byino(cp, &cache_hash_byino[hv_ino]); + } + + assert(cp->page->refcount >= 1); + cp->page->refcount--; + + lru_rm(cp); + + if(pb->refcount == 0) { + assert(pb->phys != MAP_NONE); + free_mem(ABS2CLICK(pb->phys), 1); + SLABFREE(pb); + } + + SLABFREE(cp); +} + +int cache_freepages(int pages) +{ + struct cached_page *cp, *newercp; + int freed = 0; + int oldsteps = 0; + int skips = 0; + + for(cp = lru_oldest; cp && freed < pages; cp = newercp) { + newercp = cp->newer; + assert(cp->page->refcount >= 1); + if(cp->page->refcount == 1) { + rmcache(cp); + freed++; + skips = 0; + } else skips++; + oldsteps++; + } + + return freed; +} + +void get_stats_info(struct vm_stats_info *vsi) +{ + vsi->vsi_cached = cached_pages; +} + diff --git a/servers/vm/cache.h b/servers/vm/cache.h new file mode 100644 index 000000000..581349af8 --- /dev/null +++ b/servers/vm/cache.h @@ -0,0 +1,21 @@ + +struct cached_page { + /* - The (dev, dev_offset) pair are unique; + * the (ino, ino_offset) pair is information and + * might be missing. duplicate do not make sense + * although it won't bother VM much. + * - dev must always be valid, i.e. not NO_DEV + * - ino may be unknown, i.e. VMC_NO_INODE + */ + dev_t dev; /* which dev is it on */ + u64_t dev_offset; /* offset within dev */ + + ino_t ino; /* which ino is it about */ + u64_t ino_offset; /* offset within ino */ + struct phys_block *page; /* page ptr */ + struct cached_page *older; /* older in lru chain */ + struct cached_page *newer; /* newer in lru chain */ + struct cached_page *hash_next_dev; /* next in hash chain (bydev) */ + struct cached_page *hash_next_ino; /* next in hash chain (byino) */ +}; + diff --git a/servers/vm/exit.c b/servers/vm/exit.c index e6da3b6d9..f287b327a 100644 --- a/servers/vm/exit.c +++ b/servers/vm/exit.c @@ -29,21 +29,20 @@ void free_proc(struct vmproc *vmp) map_free_proc(vmp); pt_free(&vmp->vm_pt); region_init(&vmp->vm_regions_avl); - vmp->vm_region_top = 0; #if VMSTATS vmp->vm_bytecopies = 0; #endif + vmp->vm_region_top = 0; } void clear_proc(struct vmproc *vmp) { region_init(&vmp->vm_regions_avl); - vmp->vm_region_top = 0; - vmp->vm_callback = NULL; /* No pending vfs callback. */ vmp->vm_flags = 0; /* Clear INUSE, so slot is free. */ #if VMSTATS vmp->vm_bytecopies = 0; #endif + vmp->vm_region_top = 0; } /*===========================================================================* @@ -61,6 +60,7 @@ SANITYCHECK(SCL_FUNCTIONS); return EINVAL; } vmp = &vmproc[proc]; + if(!(vmp->vm_flags & VMF_EXITING)) { printf("VM: unannounced VM_EXIT %d\n", msg->VME_ENDPOINT); return EINVAL; diff --git a/servers/vm/fork.c b/servers/vm/fork.c index 39dcd9d8b..877ec7f7b 100644 --- a/servers/vm/fork.c +++ b/servers/vm/fork.c @@ -102,10 +102,10 @@ int do_fork(message *msg) * and its return value needn't be checked. */ vir = msgaddr; - if (handle_memory(vmc, vir, sizeof(message), 1) != OK) + if (handle_memory(vmc, vir, sizeof(message), 1, NULL, 0, 0) != OK) panic("do_fork: handle_memory for child failed\n"); vir = msgaddr; - if (handle_memory(vmp, vir, sizeof(message), 1) != OK) + if (handle_memory(vmp, vir, sizeof(message), 1, NULL, 0, 0) != OK) panic("do_fork: handle_memory for parent failed\n"); } diff --git a/servers/vm/glo.h b/servers/vm/glo.h index 9ef20fcaa..4a5e77fc3 100644 --- a/servers/vm/glo.h +++ b/servers/vm/glo.h @@ -23,15 +23,18 @@ EXTERN kinfo_t kernel_boot_info; #if SANITYCHECKS EXTERN int nocheck; EXTERN int incheck; -EXTERN long vm_sanitychecklevel; EXTERN int sc_lastline; EXTERN char *sc_lastfile; #endif +extern struct minix_kerninfo *_minix_kerninfo; + /* mem types */ EXTERN mem_type_t mem_type_anon, /* anonymous memory */ mem_type_directphys, /* direct physical mapping memory */ mem_type_anon_contig, /* physically contig anon memory */ + mem_type_cache, /* disk cache */ + mem_type_mappedfile, /* memory with file contents */ mem_type_shared; /* memory shared by multiple processes */ /* total number of memory pages */ diff --git a/servers/vm/main.c b/servers/vm/main.c index 884218bb0..9cd14b444 100644 --- a/servers/vm/main.c +++ b/servers/vm/main.c @@ -90,6 +90,7 @@ int main(void) /* This is VM's main loop. */ while (TRUE) { int r, c; + u32_t type, param; SANITYCHECK(SCL_TOP); if(missing_spares > 0) { @@ -107,7 +108,11 @@ int main(void) who_e = msg.m_source; if(vm_isokendpt(who_e, &caller_slot) != OK) panic("invalid caller %d", who_e); - c = CALLNUMBER(msg.m_type); + + type = param = msg.m_type; + type &= 0x0000FFFF; + param >>= 16; + c = CALLNUMBER(type); result = ENOSYS; /* Out of range or restricted calls return this. */ if(msg.m_type == RS_INIT && msg.m_source == RS_PROC_NR) { @@ -118,7 +123,6 @@ int main(void) "message!\n", msg.m_source); } do_pagefaults(&msg); - pt_clearmapcache(); /* * do not reply to this call, the caller is unblocked by * a sys_vmctl() call in do_pagefaults if success. VM panics @@ -322,10 +326,6 @@ void init_vm(void) assert(kernel_boot_info.mmap_size > 0); assert(kernel_boot_info.mods_with_kernel > 0); -#if SANITYCHECKS - env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX); -#endif - /* Get chunks of available memory. */ get_mem_chunks(mem_chunks); @@ -431,6 +431,10 @@ void init_vm(void) CALLMAP(VM_QUERY_EXIT, do_query_exit); CALLMAP(VM_WATCH_EXIT, do_watch_exit); + /* Cache blocks. */ + CALLMAP(VM_MAPCACHEPAGE, do_mapcache); + CALLMAP(VM_SETCACHEPAGE, do_setcache); + /* Initialize the structures for queryexit */ init_query_exit(); diff --git a/servers/vm/mem_anon.c b/servers/vm/mem_anon.c index 55d30bc23..acd346d3c 100644 --- a/servers/vm/mem_anon.c +++ b/servers/vm/mem_anon.c @@ -17,10 +17,12 @@ * pointers. */ -static int anon_reference(struct phys_region *pr); +static void anon_split(struct vmproc *vmp, struct vir_region *vr, + struct vir_region *r1, struct vir_region *r2); +static int anon_lowshrink(struct vir_region *vr, vir_bytes len); static int anon_unreference(struct phys_region *pr); static int anon_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); static int anon_sanitycheck(struct phys_region *pr, char *file, int line); static int anon_writable(struct phys_region *pr); static int anon_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l); @@ -29,21 +31,17 @@ static int anon_refcount(struct vir_region *vr); struct mem_type mem_type_anon = { .name = "anonymous memory", - .ev_reference = anon_reference, .ev_unreference = anon_unreference, .ev_pagefault = anon_pagefault, .ev_resize = anon_resize, .ev_sanitycheck = anon_sanitycheck, + .ev_lowshrink = anon_lowshrink, + .ev_split = anon_split, .regionid = anon_regionid, .writable = anon_writable, .refcount = anon_refcount }; -static int anon_reference(struct phys_region *pr) -{ - return OK; -} - static int anon_unreference(struct phys_region *pr) { assert(pr->ph->refcount == 0); @@ -53,10 +51,9 @@ static int anon_unreference(struct phys_region *pr) } static int anon_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, void *st, int l) { phys_bytes new_page, new_page_cl; - struct phys_block *pb; u32_t allocflags; allocflags = vrallocflags(region->flags); @@ -83,20 +80,7 @@ static int anon_pagefault(struct vmproc *vmp, struct vir_region *region, assert(region->flags & VR_WRITABLE); - if(sys_abscopy(ph->ph->phys, new_page, VM_PAGE_SIZE) != OK) { - panic("VM: abscopy failed\n"); - return EFAULT; - } - - if(!(pb = pb_new(new_page))) { - free_mem(new_page_cl, 1); - return ENOMEM; - } - - pb_unreferenced(region, ph, 0); - pb_link(ph, pb, ph->offset, region); - - return OK; + return mem_cow(region, ph, new_page_cl, new_page); } static int anon_sanitycheck(struct phys_region *pr, char *file, int line) @@ -137,8 +121,18 @@ static u32_t anon_regionid(struct vir_region *region) return region->id; } +static int anon_lowshrink(struct vir_region *vr, vir_bytes len) +{ + return OK; +} + static int anon_refcount(struct vir_region *vr) { return 1 + vr->remaps; } +static void anon_split(struct vmproc *vmp, struct vir_region *vr, + struct vir_region *r1, struct vir_region *r2) +{ + return; +} diff --git a/servers/vm/mem_anon_contig.c b/servers/vm/mem_anon_contig.c index f11adc676..1f2750d18 100644 --- a/servers/vm/mem_anon_contig.c +++ b/servers/vm/mem_anon_contig.c @@ -8,10 +8,10 @@ #include "region.h" #include "glo.h" -static int anon_contig_reference(struct phys_region *pr); +static int anon_contig_reference(struct phys_region *, struct phys_region *); static int anon_contig_unreference(struct phys_region *pr); static int anon_contig_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *st, int); static int anon_contig_sanitycheck(struct phys_region *pr, char *file, int line); static int anon_contig_writable(struct phys_region *pr); static int anon_contig_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l); @@ -29,7 +29,7 @@ struct mem_type mem_type_anon_contig = { }; static int anon_contig_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, void *s, int l) { panic("anon_contig_pagefault: pagefault cannot happen"); } @@ -50,7 +50,7 @@ static int anon_contig_new(struct vir_region *region) struct phys_block *pb = pb_new(MAP_NONE); struct phys_region *pr = NULL; if(pb) - pr = pb_reference(pb, p * VM_PAGE_SIZE, region); + pr = pb_reference(pb, p * VM_PAGE_SIZE, region, &mem_type_anon_contig); if(!pr) { if(pb) pb_free(pb); map_free(region); @@ -85,7 +85,8 @@ static int anon_contig_resize(struct vmproc *vmp, struct vir_region *vr, vir_byt return ENOMEM; } -static int anon_contig_reference(struct phys_region *pr) +static int anon_contig_reference(struct phys_region *pr, + struct phys_region *newpr) { printf("VM: cannot fork with physically contig memory.\n"); return ENOMEM; diff --git a/servers/vm/mem_cache.c b/servers/vm/mem_cache.c new file mode 100644 index 000000000..b5fd758c8 --- /dev/null +++ b/servers/vm/mem_cache.c @@ -0,0 +1,228 @@ + +/* This file implements the disk cache. + * + * If they exist anywhere, cached pages are always in a private + * VM datastructure. + * + * They might also be any combination of: + * - be mapped in by a filesystem for reading/writing by it + * - be mapped in by a process as the result of an mmap call (future) + * + * This file manages the datastructure of all cache blocks, and + * mapping them in and out of filesystems. + */ + +#include +#include + +#include + +#include "proto.h" +#include "vm.h" +#include "region.h" +#include "glo.h" +#include "cache.h" + +static int cache_reference(struct phys_region *pr, struct phys_region *pr2); +static int cache_unreference(struct phys_region *pr); +static int cache_sanitycheck(struct phys_region *pr, char *file, int line); +static int cache_writable(struct phys_region *pr); +static int cache_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l); +static int cache_pagefault(struct vmproc *vmp, struct vir_region *region, + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); + +struct mem_type mem_type_cache = { + .name = "cache memory", + .ev_reference = cache_reference, + .ev_unreference = cache_unreference, + .ev_resize = cache_resize, + .ev_sanitycheck = cache_sanitycheck, + .ev_pagefault = cache_pagefault, + .writable = cache_writable, +}; + +static int cache_reference(struct phys_region *pr, struct phys_region *pr2) +{ + return OK; +} + +static int cache_unreference(struct phys_region *pr) +{ + return mem_type_anon.ev_unreference(pr); +} + +static int cache_sanitycheck(struct phys_region *pr, char *file, int line) +{ + MYASSERT(usedpages_add(pr->ph->phys, VM_PAGE_SIZE) == OK); + return OK; +} + +static int cache_writable(struct phys_region *pr) +{ + /* Cache blocks are at the moment only used by filesystems so always writable. */ + assert(pr->ph->refcount > 0); + return pr->ph->phys != MAP_NONE; +} + +static int cache_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l) +{ + printf("VM: cannot resize cache blocks.\n"); + return ENOMEM; +} + +int +do_mapcache(message *msg) +{ + dev_t dev = msg->m_u.m_vmmcp.dev; + u64_t dev_off = (u64_t) msg->m_u.m_vmmcp.dev_offset_pages * VM_PAGE_SIZE; + u64_t ino_off = (u64_t) msg->m_u.m_vmmcp.ino_offset_pages * VM_PAGE_SIZE; + int n; + int bytes = msg->m_u.m_vmmcp.pages * VM_PAGE_SIZE; + struct vir_region *vr; + struct vmproc *caller; + vir_bytes offset; + + if(vm_isokendpt(msg->m_source, &n) != OK) panic("bogus source"); + caller = &vmproc[n]; + + if(bytes < VM_PAGE_SIZE) return EINVAL; + + if(!(vr = map_page_region(caller, VM_PAGE_SIZE, VM_DATATOP, bytes, + VR_ANON | VR_WRITABLE, 0, &mem_type_cache))) { + printf("VM: map_page_region failed\n"); + return ENOMEM; + } + + assert(vr->length == bytes); + + for(offset = 0; offset < bytes; offset += VM_PAGE_SIZE) { + struct cached_page *hb; + + assert(vr->length == bytes); + assert(offset < vr->length); + + if(!(hb = find_cached_page_bydev(dev, dev_off + offset, + msg->m_u.m_vmmcp.ino, ino_off + offset, 1))) { + map_unmap_region(caller, vr, 0, bytes); + return ENOENT; + } + + assert(!vr->param.pb_cache); + vr->param.pb_cache = hb->page; + + assert(vr->length == bytes); + assert(offset < vr->length); + + if(map_pf(caller, vr, offset, 1, NULL, NULL, 0) != OK) { + map_unmap_region(caller, vr, 0, bytes); + printf("VM: map_pf failed\n"); + return ENOMEM; + } + + assert(!vr->param.pb_cache); + } + + memset(msg, 0, sizeof(*msg)); + + msg->m_u.m_vmmcp_reply.addr = (void *) vr->vaddr; + + assert(vr); + +#if CACHE_SANITY + cache_sanitycheck_internal(); +#endif + + return OK; +} + +static int cache_pagefault(struct vmproc *vmp, struct vir_region *region, + struct phys_region *ph, int write, vfs_callback_t cb, + void *state, int len) +{ + vir_bytes offset = ph->offset; + assert(ph->ph->phys == MAP_NONE); + assert(region->param.pb_cache); + pb_unreferenced(region, ph, 0); + pb_link(ph, region->param.pb_cache, offset, region); + region->param.pb_cache = NULL; + + return OK; +} + +int +do_setcache(message *msg) +{ + int r; + dev_t dev = msg->m_u.m_vmmcp.dev; + u64_t dev_off = (u64_t) msg->m_u.m_vmmcp.dev_offset_pages * VM_PAGE_SIZE; + u64_t ino_off = (u64_t) msg->m_u.m_vmmcp.ino_offset_pages * VM_PAGE_SIZE; + int n; + struct vmproc *caller; + vir_bytes offset; + int bytes = msg->m_u.m_vmmcp.pages * VM_PAGE_SIZE; + + if(bytes < VM_PAGE_SIZE) return EINVAL; + + if(vm_isokendpt(msg->m_source, &n) != OK) panic("bogus source"); + caller = &vmproc[n]; + + for(offset = 0; offset < bytes; offset += VM_PAGE_SIZE) { + struct vir_region *region; + struct phys_region *phys_region = NULL; + vir_bytes v = (vir_bytes) msg->m_u.m_vmmcp.block + offset; + struct cached_page *hb; + + if(!(region = map_lookup(caller, v, &phys_region))) { + printf("VM: error: no reasonable memory region given (offset 0x%lx, 0x%lx)\n", offset, v); + return EFAULT; + } + + if(!phys_region) { + printf("VM: error: no available memory region given\n"); + return EFAULT; + } + + if((hb=find_cached_page_bydev(dev, dev_off + offset, + msg->m_u.m_vmmcp.ino, ino_off + offset, 1))) { + /* block inode info updated */ + if(hb->page != phys_region->ph) { + /* previous cache entry has become + * obsolete; make a new one. rmcache + * removes it from the cache and frees + * the page if it isn't mapped in anywhere + * else. + */ + rmcache(hb); + } else { + /* block was already there, inode info might've changed which is fine */ + continue; + } + } + + if(phys_region->memtype != &mem_type_anon && + phys_region->memtype != &mem_type_anon_contig) { + printf("VM: error: no reasonable memory type\n"); + return EFAULT; + } + + if(phys_region->ph->refcount != 1) { + printf("VM: error: no reasonable refcount\n"); + return EFAULT; + } + + phys_region->memtype = &mem_type_cache; + + if((r=addcache(dev, dev_off + offset, + msg->m_u.m_vmmcp.ino, ino_off + offset, phys_region->ph)) != OK) { + printf("VM: addcache failed\n"); + return r; + } + } + +#if CACHE_SANITY + cache_sanitycheck_internal(); +#endif + + return OK; +} + diff --git a/servers/vm/mem_directphys.c b/servers/vm/mem_directphys.c index 391f911a0..740c6e488 100644 --- a/servers/vm/mem_directphys.c +++ b/servers/vm/mem_directphys.c @@ -14,35 +14,27 @@ * pointers. */ -static int phys_reference(struct phys_region *pr); static int phys_unreference(struct phys_region *pr); static int phys_writable(struct phys_region *pr); static int phys_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); static int phys_copy(struct vir_region *vr, struct vir_region *newvr); struct mem_type mem_type_directphys = { .name = "physical memory mapping", - .ev_reference = phys_reference, .ev_copy = phys_copy, .ev_unreference = phys_unreference, .writable = phys_writable, .ev_pagefault = phys_pagefault }; -static int phys_reference(struct phys_region *pr) -{ - panic("%s", __FUNCTION__); - return OK; -} - static int phys_unreference(struct phys_region *pr) { return OK; } static int phys_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, void *st, int len) { phys_bytes arg = region->param.phys, phmem; assert(arg != MAP_NONE); diff --git a/servers/vm/mem_shared.c b/servers/vm/mem_shared.c index a8349b69b..b7fdd4944 100644 --- a/servers/vm/mem_shared.c +++ b/servers/vm/mem_shared.c @@ -13,10 +13,9 @@ * pointers. */ -static int shared_reference(struct phys_region *pr); static int shared_unreference(struct phys_region *pr); static int shared_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); static int shared_sanitycheck(struct phys_region *pr, char *file, int line); static int shared_writable(struct phys_region *pr); static void shared_delete(struct vir_region *region); @@ -26,7 +25,6 @@ static int shared_refcount(struct vir_region *vr); struct mem_type mem_type_shared = { .name = "shared memory", - .ev_reference = shared_reference, .ev_copy = shared_copy, .ev_unreference = shared_unreference, .ev_pagefault = shared_pagefault, @@ -37,11 +35,6 @@ struct mem_type mem_type_shared = { .writable = shared_writable }; -static int shared_reference(struct phys_region *pr) -{ - return OK; -} - static int shared_unreference(struct phys_region *pr) { return mem_type_anon.ev_unreference(pr); @@ -116,7 +109,8 @@ static void shared_delete(struct vir_region *region) } static int shared_pagefault(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write) + struct phys_region *ph, int write, vfs_callback_t cb, + void *state, int statelen) { struct vir_region *src_region; struct vmproc *src_vmp; @@ -131,7 +125,8 @@ static int shared_pagefault(struct vmproc *vmp, struct vir_region *region, if(!(pr = physblock_get(src_region, ph->offset))) { int r; - if((r=map_pf(src_vmp, src_region, ph->offset, write)) != OK) + if((r=map_pf(src_vmp, src_region, ph->offset, write, + NULL, NULL, 0)) != OK) return r; if(!(pr = physblock_get(src_region, ph->offset))) { panic("missing region after pagefault handling"); diff --git a/servers/vm/memtype.h b/servers/vm/memtype.h index b0fff8e41..7ff4ce9c6 100644 --- a/servers/vm/memtype.h +++ b/servers/vm/memtype.h @@ -6,18 +6,24 @@ struct vmproc; struct vir_region; struct phys_region; +typedef void (*vfs_callback_t)(struct vmproc *vmp, message *m, + void *, void *); + typedef struct mem_type { char *name; /* human-readable name */ int (*ev_new)(struct vir_region *region); void (*ev_delete)(struct vir_region *region); - int (*ev_reference)(struct phys_region *pr); + int (*ev_reference)(struct phys_region *pr, struct phys_region *newpr); int (*ev_unreference)(struct phys_region *pr); int (*ev_pagefault)(struct vmproc *vmp, struct vir_region *region, - struct phys_region *ph, int write); + struct phys_region *ph, int write, vfs_callback_t cb, void *, int); int (*ev_resize)(struct vmproc *vmp, struct vir_region *vr, vir_bytes len); + void (*ev_split)(struct vmproc *vmp, struct vir_region *vr, + struct vir_region *r1, struct vir_region *r2); int (*writable)(struct phys_region *pr); int (*ev_sanitycheck)(struct phys_region *pr, char *file, int line); int (*ev_copy)(struct vir_region *vr, struct vir_region *newvr); + int (*ev_lowshrink)(struct vir_region *vr, vir_bytes len); u32_t (*regionid)(struct vir_region *vr); int (*refcount)(struct vir_region *vr); } mem_type_t; diff --git a/servers/vm/mmap.c b/servers/vm/mmap.c index 1cdc677a0..4340f94b5 100644 --- a/servers/vm/mmap.c +++ b/servers/vm/mmap.c @@ -56,8 +56,6 @@ static struct vir_region *mmap_region(struct vmproc *vmp, vir_bytes addr, if(len % VM_PAGE_SIZE) len += VM_PAGE_SIZE - (len % VM_PAGE_SIZE); -#if 0 - /* MAP_FIXED is restored in a later commit */ if (addr && (vmm_flags & MAP_FIXED)) { int r = map_unmap_range(vmp, addr, len); if(r != OK) { @@ -65,7 +63,6 @@ static struct vir_region *mmap_region(struct vmproc *vmp, vir_bytes addr, return NULL; } } -#endif if (addr || (vmm_flags & MAP_FIXED)) { /* An address is given, first try at that address. */ @@ -359,8 +356,7 @@ int do_munmap(message *m) { int r, n; struct vmproc *vmp; - vir_bytes addr, len, offset; - struct vir_region *vr; + vir_bytes addr, len; endpoint_t target = SELF; if(m->m_type == VM_UNMAP_PHYS) { @@ -384,30 +380,20 @@ int do_munmap(message *m) addr = (vir_bytes) m->VMUN_ADDR; } else addr = (vir_bytes) m->VMUM_ADDR; - if(!(vr = map_lookup(vmp, addr, NULL))) { - printf("VM: unmap: virtual address 0x%lx not found in %d\n", - addr, target); - return EFAULT; - } - if(addr % VM_PAGE_SIZE) return EFAULT; if(m->m_type == VM_UNMAP_PHYS || m->m_type == VM_SHM_UNMAP) { + struct vir_region *vr; + if(!(vr = map_lookup(vmp, addr, NULL))) { + printf("VM: unmap: address 0x%lx not found in %d\n", + addr, target); + sys_sysctl_stacktrace(target); + return EFAULT; + } len = vr->length; } else len = roundup(m->VMUM_LEN, VM_PAGE_SIZE); - offset = addr - vr->vaddr; - - if(offset + len > vr->length) { - printf("munmap: addr 0x%lx len 0x%lx spills out of region\n", - addr, len); - return EFAULT; - } - - if(map_unmap_region(vmp, vr, offset, len) != OK) - panic("do_munmap: map_unmap_region failed"); - - return OK; + return map_unmap_range(vmp, addr, len); } diff --git a/servers/vm/pagefaults.c b/servers/vm/pagefaults.c index acc497562..46c254777 100644 --- a/servers/vm/pagefaults.c +++ b/servers/vm/pagefaults.c @@ -45,23 +45,34 @@ char *pf_errstr(u32_t err) return buf; } -/*===========================================================================* - * do_pagefaults * - *===========================================================================*/ -void do_pagefaults(message *m) -{ - endpoint_t ep = m->m_source; - u32_t addr = m->VPF_ADDR; - u32_t err = m->VPF_FLAGS; - struct vmproc *vmp; - int s; +struct pf_state { + endpoint_t ep; + vir_bytes vaddr; + u32_t err; +}; +struct hm_state { + endpoint_t requestor; + struct vmproc *vmp; + vir_bytes mem; + vir_bytes len; + int wrflag; +}; + +static void pf_cont(struct vmproc *vmp, message *m, void *arg, void *statearg); + +static void hm_cont(struct vmproc *vmp, message *m, void *arg, void *statearg); + +static void handle_pagefault(endpoint_t ep, vir_bytes addr, u32_t err, int retry) +{ + struct vmproc *vmp; + int s, result; struct vir_region *region; vir_bytes offset; int p, wr = PFERR_WRITE(err); if(vm_isokendpt(ep, &p) != OK) - panic("do_pagefaults: endpoint wrong: %d", ep); + panic("handle_pagefault: endpoint wrong: %d", ep); vmp = &vmproc[p]; assert(vmp->vm_flags & VMF_INUSE); @@ -69,11 +80,11 @@ void do_pagefaults(message *m) /* See if address is valid at all. */ if(!(region = map_lookup(vmp, addr, NULL))) { if(PFERR_PROT(err)) { - printf("VM: pagefault: SIGSEGV %d protected addr 0x%x; %s\n", + printf("VM: pagefault: SIGSEGV %d protected addr 0x%lx; %s\n", ep, addr, pf_errstr(err)); } else { assert(PFERR_NOPAGE(err)); - printf("VM: pagefault: SIGSEGV %d bad addr 0x%x; %s\n", + printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx; %s\n", ep, addr, pf_errstr(err)); sys_sysctl_stacktrace(ep); } @@ -86,7 +97,7 @@ void do_pagefaults(message *m) /* If process was writing, see if it's writable. */ if(!(region->flags & VR_WRITABLE) && wr) { - printf("VM: pagefault: SIGSEGV %d ro map 0x%x %s\n", + printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n", ep, addr, pf_errstr(err)); if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) panic("sys_kill failed: %d", s); @@ -99,20 +110,75 @@ void do_pagefaults(message *m) offset = addr - region->vaddr; /* Access is allowed; handle it. */ - if((map_pf(vmp, region, offset, wr)) != OK) { + if(retry) { + result = map_pf(vmp, region, offset, wr, NULL, NULL, 0); + assert(result != SUSPEND); + } else { + struct pf_state state; + state.ep = ep; + state.vaddr = addr; + state.err = err; + result = map_pf(vmp, region, offset, wr, pf_cont, + &state, sizeof(state)); + } + + if(result == SUSPEND) { + return; + } + + if(result != OK) { printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep); - if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK) + if((s=sys_kill(ep, SIGSEGV)) != OK) panic("sys_kill failed: %d", s); if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK) panic("do_pagefaults: sys_vmctl failed: %d", ep); return; } + pt_clearmapcache(); + /* Pagefault is handled, so now reactivate the process. */ if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK) panic("do_pagefaults: sys_vmctl failed: %d", ep); } + +static void pf_cont(struct vmproc *vmp, message *m, + void *arg, void *statearg) +{ + struct pf_state *state = statearg; + handle_pagefault(state->ep, state->vaddr, state->err, 1); +} + +static void hm_cont(struct vmproc *vmp, message *m, + void *arg, void *statearg) +{ + int r; + struct hm_state *state = statearg; + printf("hm_cont: result %d\n", m->VMV_RESULT); + r = handle_memory(vmp, state->mem, state->len, state->wrflag, + hm_cont, &state, sizeof(state)); + if(r == SUSPEND) { + printf("VM: hm_cont: damnit: hm_cont: more SUSPEND\n"); + return; + } + + printf("VM: hm_cont: ok, result %d, requestor %d\n", r, state->requestor); + + if(sys_vmctl(state->requestor, VMCTL_MEMREQ_REPLY, r) != OK) + panic("hm_cont: sys_vmctl failed: %d", r); + + printf("MEMREQ_REPLY sent\n"); +} + +/*===========================================================================* + * do_pagefaults * + *===========================================================================*/ +void do_pagefaults(message *m) +{ + handle_pagefault(m->m_source, m->VPF_ADDR, m->VPF_FLAGS, 0); +} + /*===========================================================================* * do_memory * *===========================================================================*/ @@ -132,25 +198,43 @@ void do_memory(void) switch(r) { case VMPTYPE_CHECK: + { + struct hm_state state; + if(vm_isokendpt(who, &p) != OK) panic("do_memory: bad endpoint: %d", who); vmp = &vmproc[p]; - r = handle_memory(vmp, mem, len, wrflag); + + state.vmp = vmp; + state.mem = mem; + state.len = len; + state.wrflag = wrflag; + state.requestor = requestor; + + r = handle_memory(vmp, mem, len, + wrflag, hm_cont, &state, sizeof(state)); + break; + } + default: return; } - if(sys_vmctl(requestor, VMCTL_MEMREQ_REPLY, r) != OK) + if(r != SUSPEND) { + if(sys_vmctl(requestor, VMCTL_MEMREQ_REPLY, r) != OK) panic("do_memory: sys_vmctl failed: %d", r); + } } } -int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag) +int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag, + vfs_callback_t callback, void *state, int statelen) { struct vir_region *region; vir_bytes o; + struct hm_state *hmstate = (struct hm_state *) state; /* Page-align memory and length. */ o = mem % VM_PAGE_SIZE; @@ -181,8 +265,14 @@ int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int wrflag) if(offset + sublen > region->length) sublen = region->length - offset; - r = map_handle_memory(vmp, region, offset, - sublen, wrflag); + if(hmstate && hmstate->requestor == VFS_PROC_NR + && region->def_memtype == &mem_type_mappedfile) { + r = map_handle_memory(vmp, region, offset, + sublen, wrflag, NULL, NULL, 0); + } else { + r = map_handle_memory(vmp, region, offset, + sublen, wrflag, callback, state, sizeof(*state)); + } len -= sublen; mem += sublen; diff --git a/servers/vm/pb.c b/servers/vm/pb.c index 908840485..2a024f839 100644 --- a/servers/vm/pb.c +++ b/servers/vm/pb.c @@ -45,6 +45,7 @@ USE(newpb, newpb->phys = phys; newpb->refcount = 0; newpb->firstregion = NULL; + newpb->flags = 0; ); return newpb; @@ -65,13 +66,12 @@ USE(newphysr, newphysr->ph = newpb; newphysr->parent = parent; newphysr->next_ph_list = newpb->firstregion; - newphysr->memtype = parent->def_memtype; newpb->firstregion = newphysr;); newpb->refcount++; } struct phys_region *pb_reference(struct phys_block *newpb, - vir_bytes offset, struct vir_region *region) + vir_bytes offset, struct vir_region *region, mem_type_t *memtype) { struct phys_region *newphysr; @@ -80,6 +80,8 @@ struct phys_region *pb_reference(struct phys_block *newpb, return NULL; } + newphysr->memtype = memtype; + /* New physical region. */ pb_link(newphysr, newpb, offset, region); @@ -120,7 +122,7 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr, int rm) if(pb->refcount == 0) { assert(!pb->firstregion); int r; - if((r = region->def_memtype->ev_unreference(pr)) != OK) + if((r = pr->memtype->ev_unreference(pr)) != OK) panic("unref failed, %d", r); SLABFREE(pb); @@ -130,3 +132,37 @@ void pb_unreferenced(struct vir_region *region, struct phys_region *pr, int rm) if(rm) physblock_set(region, pr->offset, NULL); } + +int mem_cow(struct vir_region *region, + struct phys_region *ph, phys_bytes new_page_cl, phys_bytes new_page) +{ + struct phys_block *pb; + + if(new_page == MAP_NONE) { + u32_t allocflags; + allocflags = vrallocflags(region->flags); + + if((new_page_cl = alloc_mem(1, allocflags)) == NO_MEM) + return ENOMEM; + + new_page = CLICK2ABS(new_page_cl); + } + + assert(ph->ph->phys != MAP_NONE); + + if(sys_abscopy(ph->ph->phys, new_page, VM_PAGE_SIZE) != OK) { + panic("VM: abscopy failed\n"); + return EFAULT; + } + + if(!(pb = pb_new(new_page))) { + free_mem(new_page_cl, 1); + return ENOMEM; + } + + pb_unreferenced(region, ph, 0); + pb_link(ph, pb, ph->offset, region); + ph->memtype = &mem_type_anon; + + return OK; +} diff --git a/servers/vm/proto.h b/servers/vm/proto.h index 6941b0b8a..2d5018a50 100644 --- a/servers/vm/proto.h +++ b/servers/vm/proto.h @@ -70,13 +70,14 @@ int do_unmap_phys(message *msg); int do_remap(message *m); int do_get_phys(message *m); int do_get_refcount(message *m); +int do_vfs_mmap(message *m); /* pagefaults.c */ void do_pagefaults(message *m); void do_memory(void); char *pf_errstr(u32_t err); int handle_memory(struct vmproc *vmp, vir_bytes mem, vir_bytes len, int - wrflag); + wrflag, vfs_callback_t cb, void *state, int statelen); /* $(ARCH)/pagetable.c */ void pt_init(); @@ -103,6 +104,7 @@ int pt_mapkernel(pt_t *pt); void vm_pagelock(void *vir, int lockflag); int vm_addrok(void *vir, int write); int get_vm_self_pages(void); +int pt_writable(struct vmproc *vmp, vir_bytes v); #if SANITYCHECKS void pt_sanitycheck(pt_t *pt, char *file, int line); @@ -133,6 +135,7 @@ int map_region_extend(struct vmproc *vmp, struct vir_region *vr, int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes vir); int map_unmap_region(struct vmproc *vmp, struct vir_region *vr, vir_bytes offset, vir_bytes len); +int map_unmap_range(struct vmproc *vmp, vir_bytes, vir_bytes); int map_free_proc(struct vmproc *vmp); int map_proc_copy(struct vmproc *dst, struct vmproc *src); int map_proc_copy_from(struct vmproc *dst, struct vmproc *src, struct @@ -140,10 +143,11 @@ int map_proc_copy_from(struct vmproc *dst, struct vmproc *src, struct struct vir_region *map_lookup(struct vmproc *vmp, vir_bytes addr, struct phys_region **pr); int map_pf(struct vmproc *vmp, struct vir_region *region, vir_bytes - offset, int write); + offset, int write, vfs_callback_t pf_callback, void *state, int); int map_pin_memory(struct vmproc *vmp); int map_handle_memory(struct vmproc *vmp, struct vir_region *region, - vir_bytes offset, vir_bytes len, int write); + vir_bytes offset, vir_bytes len, int write, vfs_callback_t cb, + void *state, int statelen); void map_printmap(struct vmproc *vmp); int map_writept(struct vmproc *vmp); void printregionstats(struct vmproc *vmp); @@ -153,6 +157,8 @@ int map_free(struct vir_region *region); struct phys_region *physblock_get(struct vir_region *region, vir_bytes offset); void physblock_set(struct vir_region *region, vir_bytes offset, struct phys_region *newphysr); +int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, + struct phys_region *pr); struct vir_region * map_region_lookup_tag(struct vmproc *vmp, u32_t tag); @@ -162,7 +168,6 @@ int map_get_phys(struct vmproc *vmp, vir_bytes addr, phys_bytes *r); int map_get_ref(struct vmproc *vmp, vir_bytes addr, u8_t *cnt); int physregions(struct vir_region *vr); -void get_stats_info(struct vm_stats_info *vsi); void get_usage_info(struct vmproc *vmp, struct vm_usage_info *vui); void get_usage_info_kernel(struct vm_usage_info *vui); int get_region_info(struct vmproc *vmp, struct vm_region_info *vri, int @@ -188,13 +193,40 @@ void init_query_exit(void); struct phys_block *pb_new(phys_bytes phys); void pb_free(struct phys_block *); struct phys_region *pb_reference(struct phys_block *newpb, - vir_bytes offset, struct vir_region *region); + vir_bytes offset, struct vir_region *region, mem_type_t *); void pb_unreferenced(struct vir_region *region, struct phys_region *pr, int rm); void pb_link(struct phys_region *newphysr, struct phys_block *newpb, vir_bytes offset, struct vir_region *parent); +int mem_cow(struct vir_region *region, + struct phys_region *ph, phys_bytes new_page_cl, phys_bytes new_page); /* mem_directphys.c */ void phys_setphys(struct vir_region *vr, phys_bytes startaddr); /* mem_shared.c */ void shared_setsource(struct vir_region *vr, endpoint_t ep, struct vir_region *src); + +/* mem_cache.c */ +int do_mapcache(message *m); +int do_setcache(message *m); + +/* cache.c */ +struct cached_page *find_cached_page_bydev(dev_t dev, u64_t dev_off, + ino_t ino, u64_t ino_off, int touchlru); +struct cached_page *find_cached_page_byino(dev_t dev, ino_t ino, u64_t ino_off, int touchlru); +int addcache(dev_t dev, u64_t def_off, ino_t ino, u64_t ino_off, struct phys_block *pb); +void cache_sanitycheck_internal(void); +int cache_freepages(int pages); +void get_stats_info(struct vm_stats_info *vsi); +void cache_lru_touch(struct cached_page *hb); +void rmcache(struct cached_page *cp); + +/* vfs.c */ +int vfs_request(int reqno, int fd, struct vmproc *vmp, u64_t offset, + u32_t len, vfs_callback_t reply_callback, void *cbarg, void *state, + int statelen); +int do_vfs_reply(message *m); + +/* mem_file.c */ +void mappedfile_setfile(struct vir_region *region, int fd, u64_t offset, + dev_t dev, ino_t ino, u16_t clearend, int prefill); diff --git a/servers/vm/region.c b/servers/vm/region.c index a7ed1ec58..a77f415dd 100644 --- a/servers/vm/region.c +++ b/servers/vm/region.c @@ -30,9 +30,6 @@ #include "memtype.h" #include "regionavl.h" -static int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, - struct phys_region *pr); - static struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr); @@ -45,14 +42,18 @@ void map_printregion(struct vir_region *vr) int i; struct phys_region *ph; printf("map_printmap: map_name: %s\n", vr->def_memtype->name); - printf("\t%lx (len 0x%lx, %lukB), %p\n", - vr->vaddr, vr->length, vr->length/1024, vr->def_memtype->name); + printf("\t%lx (len 0x%lx, %lukB), %p, %s\n", + vr->vaddr, vr->length, vr->length/1024, + vr->def_memtype->name, + (vr->flags & VR_WRITABLE) ? "writable" : "readonly"); printf("\t\tphysblocks:\n"); for(i = 0; i < vr->length/VM_PAGE_SIZE; i++) { if(!(ph=vr->physblocks[i])) continue; - printf("\t\t@ %lx (refs %d): phys 0x%lx\n", + printf("\t\t@ %lx (refs %d): phys 0x%lx, %s\n", (vr->vaddr + ph->offset), - ph->ph->refcount, ph->ph->phys); + ph->ph->refcount, ph->ph->phys, + pt_writable(vr->parent, vr->vaddr + ph->offset) ? "W" : "R"); + } } @@ -122,8 +123,8 @@ static struct vir_region *getnextvr(struct vir_region *vr) int pr_writable(struct vir_region *vr, struct phys_region *pr) { - assert(vr->def_memtype->writable); - return ((vr->flags & VR_WRITABLE) && vr->def_memtype->writable(pr)); + assert(pr->memtype->writable); + return ((vr->flags & VR_WRITABLE) && pr->memtype->writable(pr)); } #if SANITYCHECKS @@ -196,8 +197,8 @@ void map_sanitycheck(char *file, int line) ALLREGIONS(;,MYASSERT(pr->offset == voffset);); ALLREGIONS(;,USE(pr->ph, pr->ph->seencount++;); if(pr->ph->seencount == 1) { - if(pr->parent->memtype->ev_sanitycheck) - pr->parent->memtype->ev_sanitycheck(pr, file, line); + if(pr->memtype->ev_sanitycheck) + pr->memtype->ev_sanitycheck(pr, file, line); } ); @@ -209,6 +210,7 @@ void map_sanitycheck(char *file, int line) } } MYASSERT(!(vr->vaddr % VM_PAGE_SIZE));, + if(pr->ph->flags & PBF_INCACHE) pr->ph->seencount++; if(pr->ph->refcount != pr->ph->seencount) { map_printmap(vmp); printf("ph in vr %p: 0x%lx refcount %u " @@ -233,6 +235,7 @@ void map_sanitycheck(char *file, int line) MYASSERT(others->ph == pr->ph); n_others++; } + if(pr->ph->flags & PBF_INCACHE) n_others++; MYASSERT(pr->ph->refcount == n_others); } MYASSERT(pr->ph->refcount == pr->ph->seencount); @@ -245,7 +248,7 @@ void map_sanitycheck(char *file, int line) /*=========================================================================* * map_ph_writept * *=========================================================================*/ -static int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, +int map_ph_writept(struct vmproc *vmp, struct vir_region *vr, struct phys_region *pr) { int flags = PTF_PRESENT | PTF_USER; @@ -490,7 +493,8 @@ mem_type_t *memtype; } if(mapflags & MF_PREALLOC) { - if(map_handle_memory(vmp, newregion, 0, length, 1) != OK) { + if(map_handle_memory(vmp, newregion, 0, length, 1, + NULL, 0, 0) != OK) { printf("VM: map_page_region: prealloc failed\n"); free(newregion->physblocks); USE(newregion, @@ -657,8 +661,6 @@ u32_t vrallocflags(u32_t flags) allocflags |= PAF_LOWER16MB; if(flags & VR_LOWER1MB) allocflags |= PAF_LOWER1MB; - if(flags & VR_CONTIG) - allocflags |= PAF_CONTIG; if(!(flags & VR_UNINITIALIZED)) allocflags |= PAF_CLEAR; @@ -668,11 +670,14 @@ u32_t vrallocflags(u32_t flags) /*===========================================================================* * map_pf * *===========================================================================*/ -int map_pf(vmp, region, offset, write) +int map_pf(vmp, region, offset, write, pf_callback, state, len) struct vmproc *vmp; struct vir_region *region; vir_bytes offset; int write; +vfs_callback_t pf_callback; +void *state; +int len; { struct phys_region *ph; int r = OK; @@ -697,7 +702,8 @@ int write; return ENOMEM; } - if(!(ph = pb_reference(pb, offset, region))) { + if(!(ph = pb_reference(pb, offset, region, + region->def_memtype))) { printf("map_pf: pb_reference failed\n"); pb_free(pb); return ENOMEM; @@ -711,15 +717,14 @@ int write; * writable, nothing to do. */ - assert(region->def_memtype->writable); + assert(ph->memtype->writable); - if(!write || !region->def_memtype->writable(ph)) { - assert(region->def_memtype->ev_pagefault); + if(!write || !ph->memtype->writable(ph)) { + assert(ph->memtype->ev_pagefault); assert(ph->ph); - if((r = region->def_memtype->ev_pagefault(vmp, - region, ph, write)) == SUSPEND) { - panic("map_pf: memtype->ev_pagefault returned SUSPEND\n"); + if((r = ph->memtype->ev_pagefault(vmp, + region, ph, write, pf_callback, state, len)) == SUSPEND) { return SUSPEND; } @@ -755,12 +760,16 @@ int write; return r; } -int map_handle_memory(vmp, region, start_offset, length, write) +int map_handle_memory(vmp, region, start_offset, length, write, + cb, state, statelen) struct vmproc *vmp; struct vir_region *region; vir_bytes start_offset; vir_bytes length; int write; +vfs_callback_t cb; +void *state; +int statelen; { vir_bytes offset, lim; int r; @@ -770,7 +779,8 @@ int write; assert(lim > start_offset); for(offset = start_offset; offset < lim; offset += VM_PAGE_SIZE) - if((r = map_pf(vmp, region, offset, write)) != OK) + if((r = map_pf(vmp, region, offset, write, + cb, state, statelen)) != OK) return r; return OK; @@ -788,7 +798,7 @@ int map_pin_memory(struct vmproc *vmp) /* Scan all memory regions. */ while((vr = region_get_iter(&iter))) { /* Make sure region is mapped to physical memory and writable.*/ - r = map_handle_memory(vmp, vr, 0, vr->length, 1); + r = map_handle_memory(vmp, vr, 0, vr->length, 1, NULL, 0, 0); if(r != OK) { panic("map_pin_memory: map_handle_memory failed: %d", r); } @@ -800,7 +810,7 @@ int map_pin_memory(struct vmproc *vmp) /*===========================================================================* * map_copy_region * *===========================================================================*/ -static struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr) +struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region *vr) { /* map_copy_region creates a complete copy of the vir_region * data structure, linking in the same phys_blocks directly, @@ -829,11 +839,17 @@ static struct vir_region *map_copy_region(struct vmproc *vmp, struct vir_region } for(p = 0; p < phys_slot(vr->length); p++) { + struct phys_region *newph; + if(!(ph = physblock_get(vr, p*VM_PAGE_SIZE))) continue; - struct phys_region *newph = pb_reference(ph->ph, ph->offset, newvr); + newph = pb_reference(ph->ph, ph->offset, newvr, + vr->def_memtype); if(!newph) { map_free(newvr); return NULL; } + if(ph->memtype->ev_reference) + ph->memtype->ev_reference(ph, newph); + #if SANITYCHECKS USE(newph, newph->written = 0;); assert(physregions(vr) == cr); @@ -994,10 +1010,10 @@ struct vir_region *start_src_vr; int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes v) { - vir_bytes offset = v; + vir_bytes offset = v, limit, extralen; struct vir_region *vr, *nextvr; struct phys_region **newpr; - int newslots, prevslots, addedslots; + int newslots, prevslots, addedslots, r; offset = roundup(offset, VM_PAGE_SIZE); @@ -1008,21 +1024,15 @@ int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes v) if(vr->vaddr + vr->length >= v) return OK; + limit = vr->vaddr + vr->length; + assert(vr->vaddr <= offset); newslots = phys_slot(offset - vr->vaddr); prevslots = phys_slot(vr->length); assert(newslots >= prevslots); addedslots = newslots - prevslots; - - if(!(newpr = realloc(vr->physblocks, - newslots * sizeof(struct phys_region *)))) { - printf("VM: map_region_extend_upto_v: realloc failed\n"); - return ENOMEM; - } - - vr->physblocks = newpr; - memset(vr->physblocks + prevslots, 0, - addedslots * sizeof(struct phys_region *)); + extralen = offset - limit; + assert(extralen > 0); if((nextvr = getnextvr(vr))) { assert(offset <= nextvr->vaddr); @@ -1034,11 +1044,28 @@ int map_region_extend_upto_v(struct vmproc *vmp, vir_bytes v) } if(!vr->def_memtype->ev_resize) { - printf("VM: can't resize this type of memory\n"); + if(!map_page_region(vmp, limit, 0, extralen, + VR_WRITABLE | VR_ANON, + 0, &mem_type_anon)) { + printf("resize: couldn't put anon memory there\n"); + return ENOMEM; + } + return OK; + } + + if(!(newpr = realloc(vr->physblocks, + newslots * sizeof(struct phys_region *)))) { + printf("VM: map_region_extend_upto_v: realloc failed\n"); return ENOMEM; } - return vr->def_memtype->ev_resize(vmp, vr, offset - vr->vaddr); + vr->physblocks = newpr; + memset(vr->physblocks + prevslots, 0, + addedslots * sizeof(struct phys_region *)); + + r = vr->def_memtype->ev_resize(vmp, vr, offset - vr->vaddr); + + return r; } /*========================================================================* @@ -1066,11 +1093,27 @@ int map_unmap_region(struct vmproc *vmp, struct vir_region *r, map_subfree(r, offset, len); /* if unmap was at start/end of this region, it actually shrinks */ - if(offset == 0) { + if(r->length == len) { + /* Whole region disappears. Unlink and free it. */ + region_remove(&vmp->vm_regions_avl, r->vaddr); + map_free(r); + } else if(offset == 0) { struct phys_region *pr; vir_bytes voffset; int remslots; + if(!r->def_memtype->ev_lowshrink) { + printf("VM: low-shrinking not implemented for %s\n", + r->def_memtype->name); + return EINVAL; + } + + if(r->def_memtype->ev_lowshrink(r, len) != OK) { + printf("VM: low-shrinking failed for %s\n", + r->def_memtype->name); + return EINVAL; + } + region_remove(&vmp->vm_regions_avl, r->vaddr); USE(r, @@ -1099,12 +1142,6 @@ int map_unmap_region(struct vmproc *vmp, struct vir_region *r, r->length -= len; } - if(r->length == 0) { - /* Whole region disappears. Unlink and free it. */ - region_remove(&vmp->vm_regions_avl, r->vaddr); - map_free(r); - } - SANITYCHECK(SCL_DETAIL); if(pt_writemap(vmp, &vmp->vm_pt, regionstart, @@ -1118,6 +1155,154 @@ int map_unmap_region(struct vmproc *vmp, struct vir_region *r, return OK; } +int split_region(struct vmproc *vmp, struct vir_region *vr, + struct vir_region **vr1, struct vir_region **vr2, vir_bytes split_len) +{ + struct vir_region *r1 = NULL, *r2 = NULL; + vir_bytes rem_len = vr->length - split_len; + int slots1, slots2; + vir_bytes voffset; + int n1 = 0, n2 = 0; + + assert(!(split_len % VM_PAGE_SIZE)); + assert(!(rem_len % VM_PAGE_SIZE)); + assert(!(vr->vaddr % VM_PAGE_SIZE)); + assert(!(vr->length % VM_PAGE_SIZE)); + + if(!vr->def_memtype->ev_split) { + printf("VM: split region not implemented for %s\n", + vr->def_memtype->name); + return EINVAL; + } + + slots1 = phys_slot(split_len); + slots2 = phys_slot(rem_len); + + if(!(r1 = region_new(vmp, vr->vaddr, split_len, vr->flags, + vr->def_memtype))) { + goto bail; + } + + if(!(r2 = region_new(vmp, vr->vaddr+split_len, rem_len, vr->flags, + vr->def_memtype))) { + map_free(r1); + goto bail; + } + + for(voffset = 0; voffset < r1->length; voffset += VM_PAGE_SIZE) { + struct phys_region *ph, *phn; + if(!(ph = physblock_get(vr, voffset))) continue; + if(!(phn = pb_reference(ph->ph, voffset, r1, ph->memtype))) + goto bail; + n1++; + } + + for(voffset = 0; voffset < r2->length; voffset += VM_PAGE_SIZE) { + struct phys_region *ph, *phn; + if(!(ph = physblock_get(vr, split_len + voffset))) continue; + if(!(phn = pb_reference(ph->ph, voffset, r2, ph->memtype))) + goto bail; + n2++; + } + + vr->def_memtype->ev_split(vmp, vr, r1, r2); + + region_remove(&vmp->vm_regions_avl, vr->vaddr); + map_free(vr); + region_insert(&vmp->vm_regions_avl, r1); + region_insert(&vmp->vm_regions_avl, r2); + + *vr1 = r1; + *vr2 = r2; + + return OK; + + bail: + if(r1) map_free(r1); + if(r2) map_free(r2); + + printf("split_region: failed\n"); + + return ENOMEM; +} + +int map_unmap_range(struct vmproc *vmp, vir_bytes unmap_start, vir_bytes length) +{ + vir_bytes o = unmap_start % VM_PAGE_SIZE, unmap_limit; + region_iter v_iter; + struct vir_region *vr, *nextvr; + + unmap_start -= o; + length += o; + length = roundup(length, VM_PAGE_SIZE); + unmap_limit = length + unmap_start; + + if(length < VM_PAGE_SIZE) return EINVAL; + if(unmap_limit <= unmap_start) return EINVAL; + + region_start_iter(&vmp->vm_regions_avl, &v_iter, unmap_start, AVL_LESS_EQUAL); + + if(!(vr = region_get_iter(&v_iter))) { + region_start_iter(&vmp->vm_regions_avl, &v_iter, unmap_start, AVL_GREATER); + if(!(vr = region_get_iter(&v_iter))) { + return OK; + } + } + + assert(vr); + + for(; vr && vr->vaddr < unmap_limit; vr = nextvr) { + vir_bytes thislimit = vr->vaddr + vr->length; + vir_bytes this_unmap_start, this_unmap_limit; + vir_bytes remainlen; + int r; + + region_incr_iter(&v_iter); + nextvr = region_get_iter(&v_iter); + + assert(thislimit > vr->vaddr); + + this_unmap_start = MAX(unmap_start, vr->vaddr); + this_unmap_limit = MIN(unmap_limit, thislimit); + + if(this_unmap_start >= this_unmap_limit) continue; + + if(this_unmap_start > vr->vaddr && this_unmap_limit < thislimit) { + int r; + struct vir_region *vr1, *vr2; + vir_bytes split_len = this_unmap_limit - vr->vaddr; + assert(split_len > 0); + assert(split_len < vr->length); + if((r=split_region(vmp, vr, &vr1, &vr2, split_len)) != OK) { + printf("VM: unmap split failed\n"); + return r; + } + vr = vr1; + thislimit = vr->vaddr + vr->length; + } + + remainlen = this_unmap_limit - vr->vaddr; + + assert(this_unmap_start >= vr->vaddr); + assert(this_unmap_limit <= thislimit); + assert(remainlen > 0); + + r = map_unmap_region(vmp, vr, this_unmap_start - vr->vaddr, + this_unmap_limit - this_unmap_start); + + if(r != OK) { + printf("map_unmap_range: map_unmap_region failed\n"); + return r; + } + + region_start_iter(&vmp->vm_regions_avl, &v_iter, nextvr->vaddr, AVL_EQUAL); + assert(region_get_iter(&v_iter) == nextvr); + } + + return OK; + +} + /*========================================================================* * map_get_phys * *========================================================================*/ @@ -1155,14 +1340,6 @@ int map_get_ref(struct vmproc *vmp, vir_bytes addr, u8_t *cnt) return OK; } -/*========================================================================* - * get_stats_info * - *========================================================================*/ -void get_stats_info(struct vm_stats_info *vsi) -{ - vsi->vsi_cached = 0L; -} - void get_usage_info_kernel(struct vm_usage_info *vui) { memset(vui, 0, sizeof(*vui)); @@ -1237,7 +1414,8 @@ int get_region_info(struct vmproc *vmp, struct vm_region_info *vri, region_start_iter(&vmp->vm_regions_avl, &v_iter, next, AVL_GREATER_EQUAL); if(!(vr = region_get_iter(&v_iter))) return 0; - for(count = 0; (vr = region_get_iter(&v_iter)) && count < max; count++, vri++) { + for(count = 0; (vr = region_get_iter(&v_iter)) && count < max; + region_incr_iter(&v_iter)) { struct phys_region *ph1 = NULL, *ph2 = NULL; vir_bytes voffset; @@ -1253,18 +1431,23 @@ int get_region_info(struct vmproc *vmp, struct vm_region_info *vri, if(!ph1) ph1 = ph; ph2 = ph; } - if(!ph1 || !ph2) { assert(!ph1 && !ph2); continue; } + + if(!ph1 || !ph2) { + printf("skipping empty region 0x%lx-0x%lx\n", + vr->vaddr, vr->vaddr+vr->length); + continue; + } /* Report start+length of region starting from lowest use. */ vri->vri_addr = vr->vaddr + ph1->offset; - vri->vri_prot = 0; + vri->vri_prot = PROT_READ; vri->vri_length = ph2->offset + VM_PAGE_SIZE - ph1->offset; /* "AND" the provided protection with per-page protection. */ - if (!(vr->flags & VR_WRITABLE)) - vri->vri_prot &= ~PROT_WRITE; - - region_incr_iter(&v_iter); + if (vr->flags & VR_WRITABLE) + vri->vri_prot |= PROT_WRITE; + count++; + vri++; } *nextp = next; diff --git a/servers/vm/region.h b/servers/vm/region.h index 6db556dc1..9d615ef03 100644 --- a/servers/vm/region.h +++ b/servers/vm/region.h @@ -32,6 +32,8 @@ struct phys_block { u8_t flags; }; +#define PBF_INCACHE 0x01 + typedef struct vir_region { vir_bytes vaddr; /* virtual address, offset from pagetable */ vir_bytes length; /* length in bytes */ @@ -43,12 +45,21 @@ typedef struct vir_region { u32_t id; /* unique id */ union { - phys_bytes phys; + phys_bytes phys; /* VR_DIRECT */ struct { endpoint_t ep; vir_bytes vaddr; int id; } shared; + struct phys_block *pb_cache; + struct { + int procfd; /* cloned fd in proc for mmap */ + dev_t dev; + ino_t ino; + u64_t offset; + int inited; + u16_t clearend; + } file; } param; /* AVL fields */ @@ -61,7 +72,6 @@ typedef struct vir_region { #define VR_PHYS64K 0x004 /* Physical memory must be 64k aligned. */ #define VR_LOWER16MB 0x008 #define VR_LOWER1MB 0x010 -#define VR_CONTIG 0x020 /* Must be physically contiguous. */ #define VR_SHARED 0x040 #define VR_UNINITIALIZED 0x080 /* Do not clear after allocation */ diff --git a/servers/vm/sanitycheck.h b/servers/vm/sanitycheck.h index 1528a3ec1..a2993ce5f 100644 --- a/servers/vm/sanitycheck.h +++ b/servers/vm/sanitycheck.h @@ -16,10 +16,10 @@ printf("VM:%s:%d: %s failed (last sanity check %s:%d)\n", file, line, #c, sc_lastfile, sc_lastline); \ panic("sanity check failed"); } } while(0) -#define SLABSANITYCHECK(l) if((l) <= vm_sanitychecklevel) { \ +#define SLABSANITYCHECK(l) if(_minix_kerninfo && 0) { \ slab_sanitycheck(__FILE__, __LINE__); } -#define SANITYCHECK(l) if(!nocheck && ((l) <= vm_sanitychecklevel)) { \ +#define SANITYCHECK(l) if(!nocheck && _minix_kerninfo && 0) { \ struct vmproc *vmpr; \ assert(incheck == 0); \ incheck = 1; \ diff --git a/servers/vm/slaballoc.c b/servers/vm/slaballoc.c index b7dd8bc02..c27d07128 100644 --- a/servers/vm/slaballoc.c +++ b/servers/vm/slaballoc.c @@ -28,7 +28,7 @@ #include "util.h" #include "sanitycheck.h" -#define SLABSIZES 60 +#define SLABSIZES 200 #define ITEMSPERPAGE(bytes) (DATABYTES / (bytes)) diff --git a/servers/vm/utility.c b/servers/vm/utility.c index 4c835b85f..87aad1c35 100644 --- a/servers/vm/utility.c +++ b/servers/vm/utility.c @@ -161,7 +161,7 @@ int do_info(message *m) * deadlock. Note that no memory mapping can be undone without the * involvement of VM, so we are safe until we're done. */ - r = handle_memory(vmp, ptr, size, 1 /*wrflag*/); + r = handle_memory(vmp, ptr, size, 1 /*wrflag*/, NULL, NULL, 0); if (r != OK) return r; /* Now that we know the copy out will succeed, perform the actual copy diff --git a/servers/vm/vm.h b/servers/vm/vm.h index b6d91f6fa..97bd5e3a5 100644 --- a/servers/vm/vm.h +++ b/servers/vm/vm.h @@ -6,6 +6,7 @@ /* Compile in asserts and custom sanity checks at all? */ #define SANITYCHECKS 0 +#define CACHE_SANITY 0 #define VMSTATS 0 /* VM behaviour */ diff --git a/servers/vm/vmproc.h b/servers/vm/vmproc.h index fd28f831f..5e7145505 100644 --- a/servers/vm/vmproc.h +++ b/servers/vm/vmproc.h @@ -11,8 +11,6 @@ struct vmproc; -typedef void (*callback_t)(struct vmproc *who, message *m); - struct vmproc { int vm_flags; endpoint_t vm_endpoint; @@ -22,22 +20,8 @@ struct vmproc { /* Regions in virtual address space. */ region_avl vm_regions_avl; vir_bytes vm_region_top; /* highest vaddr last inserted */ - bitchunk_t vm_call_mask[VM_CALL_MASK_SIZE]; - - /* State for requests pending to be done to vfs on behalf of - * this process. - */ - callback_t vm_callback; /* function to call on vfs reply */ - int vm_callback_type; /* expected message type */ - int vm_slot; /* process table slot */ - - union { - struct { - cp_grant_id_t gid; - } open; /* VM_VFS_OPEN */ - } vm_state; /* Callback state. */ #if VMSTATS int vm_bytecopies; #endif diff --git a/test/Makefile b/test/Makefile index ac2d4c6f5..7d4762dc4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -36,10 +36,9 @@ OBJS.test71+= testcache.o OBJS.test72+= testcache.o LDADD.test72+= -lminixfs -# temporarily disabled until 2ndary cache is back -#PROGS += testvm -#OBJS.testvm+= testcache.o -#LDADD.testvm+= -lsys -ltimers -lminlib -static +PROGS += testvm +OBJS.testvm+= testcache.o +LDADD.testvm+= -lsys -ltimers -lminlib -static FILES += testvm.conf @@ -48,7 +47,7 @@ MINIX_TESTS= \ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \ 41 42 43 44 45 46 48 49 50 52 53 54 55 56 58 59 60 \ -61 64 65 66 67 68 69 70 71 72 # 73 (2ndary cache) +61 64 65 66 67 68 69 70 71 72 73 .if ${MACHINE_ARCH} == "i386" MINIX_TESTS+= \ diff --git a/test/test72.c b/test/test72.c index f376497ca..a25d1e6a1 100644 --- a/test/test72.c +++ b/test/test72.c @@ -114,29 +114,34 @@ static void allocate(int b) ssize_t bdev_gather(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags) { - int i, block; + int i; ssize_t tot = 0; assert(dev == MYDEV); assert(curblocksize > 0); assert(!(pos % curblocksize)); - block = pos / curblocksize; for(i = 0; i < count; i++) { - int subblocks; + int subpages, block, block_off; char *data = (char *) vec[i].iov_addr; - assert(vec[i].iov_size > 0); - assert(!(vec[i].iov_size % curblocksize)); - subblocks = vec[i].iov_size / curblocksize; - while(subblocks > 0) { - assert(block > 0); + assert(!(pos % curblocksize)); + block = pos / curblocksize; + block_off = pos % curblocksize; + assert(!(vec[i].iov_size % PAGE_SIZE)); + subpages = vec[i].iov_size / PAGE_SIZE; + while(subpages > 0) { + assert(block >= 0); assert(block < MAXBLOCKS); + assert(block_off >= 0); + assert(block_off < curblocksize); if(!writtenblocks[block]) { allocate(block); } - memcpy(data, writtenblocks[block], curblocksize); + memcpy(data, writtenblocks[block] + block_off, + PAGE_SIZE); block++; - subblocks--; - data += curblocksize; - tot += curblocksize; + subpages--; + data += PAGE_SIZE; + tot += PAGE_SIZE; + block_off += PAGE_SIZE; } } @@ -156,7 +161,7 @@ bdev_scatter(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags) int subblocks; char *data = (char *) vec[i].iov_addr; assert(vec[i].iov_size > 0); - assert(!(vec[i].iov_size % curblocksize)); + assert(!(vec[i].iov_size % PAGE_SIZE)); subblocks = vec[i].iov_size / curblocksize; while(subblocks > 0) { assert(block >= 0); @@ -263,6 +268,18 @@ u32_t sqrt_approx(u32_t v) return (u32_t) sqrt(v); } +int vm_set_cacheblock(void *block, u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + return ENOSYS; +} + +void *vm_map_cacheblock(u32_t dev, u64_t dev_offset, + u64_t ino, u64_t ino_offset, u32_t *flags, int blocksize) +{ + return MAP_FAILED; +} + int main(int argc, char *argv[]) { diff --git a/test/testvm.c b/test/testvm.c index dacf31a16..d42d211a9 100644 --- a/test/testvm.c +++ b/test/testvm.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,6 +14,9 @@ #include "common.h" #include "testcache.h" +#define MYMAJOR 40 /* doesn't really matter, shouldn't be NO_DEV though */ +#define MYDEV makedev(MYMAJOR, 1) + static char *pipefilename = NULL, *progname; int pipefd = -1; @@ -23,20 +27,30 @@ static char *bdata = NULL; int dowriteblock(int b, int blocksize, u32_t seed, char *block) { int r; + char *bdata; + int mustset = 0; + u64_t dev_off = (u64_t) b * blocksize; - r=vm_yield_block_get_block(VM_BLOCKID_NONE, b, bdata, blocksize); - - if(r != OK && r != ESRCH) { - printf("dowriteblock: vm_yield_block_get_block get %d\n", r); - exit(1); + if((bdata = vm_map_cacheblock(MYDEV, dev_off, + VMC_NO_INODE, 0, NULL, blocksize)) == MAP_FAILED) { + if((bdata = minix_mmap(0, blocksize, + PROT_READ|PROT_WRITE, MAP_ANON, -1, 0)) == MAP_FAILED) { + printf("minix_mmap failed\n"); + exit(1); + } + mustset = 1; } memcpy(bdata, block, blocksize); - r=vm_yield_block_get_block(b, VM_BLOCKID_NONE, bdata, blocksize); + if(mustset && (r=vm_set_cacheblock(bdata, MYDEV, dev_off, + VMC_NO_INODE, 0, NULL, blocksize)) != OK) { + printf("dowriteblock: vm_set_cacheblock failed %d\n", r); + exit(1); + } - if(r != OK) { - printf("dowriteblock: vm_yield_block_get_block yield %d\n", r); + if(minix_munmap(bdata, blocksize) < 0) { + printf("dowriteblock: minix_munmap failed %d\n", r); exit(1); } @@ -45,28 +59,25 @@ int dowriteblock(int b, int blocksize, u32_t seed, char *block) int readblock(int b, int blocksize, u32_t seed, char *block) { - int r; + char *bdata; + u64_t dev_off = (u64_t) b * blocksize; - r=vm_yield_block_get_block(VM_BLOCKID_NONE, b, bdata, blocksize); - if(r == ESRCH) { + if((bdata = vm_map_cacheblock(MYDEV, dev_off, + VMC_NO_INODE, 0, NULL, blocksize)) == MAP_FAILED) { return OK_BLOCK_GONE; } - if(r != OK) { - printf("readblock: vm_yield_block_get_block get %d\n", r); - exit(1); - } memcpy(block, bdata, blocksize); - r=vm_yield_block_get_block(b, VM_BLOCKID_NONE, bdata, blocksize); - if(r != OK) { - printf("readblock: vm_yield_block_get_block yield %d\n", r); + + if(minix_munmap(bdata, blocksize) < 0) { + printf("dowriteblock: minix_munmap failed\n"); exit(1); } return blocksize; } -void testend(void) { vm_forgetblocks(); } +void testend(void) { } static void writepipe(struct info *i) diff --git a/test/testvm.conf b/test/testvm.conf index d2c6907b7..24aed285e 100644 --- a/test/testvm.conf +++ b/test/testvm.conf @@ -1,7 +1,7 @@ service testvm { ipc ALL; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm BASIC; + vm MAPCACHEPAGE SETCACHEPAGE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS