diff --git a/etc/system.conf b/etc/system.conf index 15f37ff1f..b920a6993 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -109,7 +109,7 @@ service mfs { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm MAPCACHEPAGE SETCACHEPAGE CLEARCACHE; + vm MAPCACHEPAGE SETCACHEPAGE FORGETCACHEPAGE CLEARCACHE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS @@ -136,7 +136,7 @@ service ext2 { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm MAPCACHEPAGE SETCACHEPAGE CLEARCACHE; + vm MAPCACHEPAGE SETCACHEPAGE FORGETCACHEPAGE CLEARCACHE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS @@ -149,7 +149,6 @@ service pfs { ipc ALL_SYS; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm MAPCACHEPAGE SETCACHEPAGE CLEARCACHE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS diff --git a/minix/commands/service/parse.c b/minix/commands/service/parse.c index 264fb432b..26a91a124 100644 --- a/minix/commands/service/parse.c +++ b/minix/commands/service/parse.c @@ -750,6 +750,7 @@ struct { "PROCCTL", VM_PROCCTL }, { "MAPCACHEPAGE", VM_MAPCACHEPAGE }, { "SETCACHEPAGE", VM_SETCACHEPAGE }, + { "FORGETCACHEPAGE", VM_FORGETCACHEPAGE }, { "CLEARCACHE", VM_CLEARCACHE }, { "VFS_MMAP", VM_VFS_MMAP }, { "VFS_REPLY", VM_VFS_REPLY }, diff --git a/minix/fs/ext2/balloc.c b/minix/fs/ext2/balloc.c index bef14964c..947e9f538 100644 --- a/minix/fs/ext2/balloc.c +++ b/minix/fs/ext2/balloc.c @@ -329,6 +329,13 @@ void free_block(struct super_block *sp, bit_t bit_returned) if (bit_returned < sp->s_bsearch) sp->s_bsearch = bit_returned; + + /* Also tell libminixfs, so that 1) if it has this block in its cache, it can + * mark it as clean, thus reducing useless writes, and 2) it can tell VM that + * any previous inode association is to be broken for this block, so that the + * block will not be mapped in erroneously later on. + */ + lmfs_free_block(sp->s_dev, (block_t)bit_returned); } diff --git a/minix/fs/ext2/write.c b/minix/fs/ext2/write.c index 389ca1716..638bf4e42 100644 --- a/minix/fs/ext2/write.c +++ b/minix/fs/ext2/write.c @@ -226,20 +226,16 @@ int op; /* special actions */ rip->i_blocks += rip->i_sp->s_sectors_in_block; } /* b1 equals NO_BLOCK only when we are freeing up the indirect block. */ - if(b1 == NO_BLOCK) - lmfs_markclean(bp); - else + if(b1 != NO_BLOCK) lmfs_markdirty(bp); put_block(bp, INDIRECT_BLOCK); } /* If the single indirect block isn't there (or was just freed), * see if we have to keep the double indirect block, if any. - * If we don't have to keep it, don't bother writing it out. */ if (b1 == NO_BLOCK && !single && b2 != NO_BLOCK && empty_indir(bp_dindir, rip->i_sp)) { - lmfs_markclean(bp_dindir); free_block(rip->i_sp, b2); rip->i_blocks -= rip->i_sp->s_sectors_in_block; b2 = NO_BLOCK; @@ -252,11 +248,9 @@ int op; /* special actions */ } /* If the double indirect block isn't there (or was just freed), * see if we have to keep the triple indirect block, if any. - * If we don't have to keep it, don't bother writing it out. */ if (b2 == NO_BLOCK && triple && b3 != NO_BLOCK && empty_indir(bp_tindir, rip->i_sp)) { - lmfs_markclean(bp_tindir); free_block(rip->i_sp, b3); rip->i_blocks -= rip->i_sp->s_sectors_in_block; rip->i_block[EXT2_TIND_BLOCK] = NO_BLOCK; diff --git a/minix/fs/mfs/cache.c b/minix/fs/mfs/cache.c index 147332c40..ed8300851 100644 --- a/minix/fs/mfs/cache.c +++ b/minix/fs/mfs/cache.c @@ -88,6 +88,12 @@ void free_zone( bit = (bit_t) (numb - (zone_t) (sp->s_firstdatazone - 1)); free_bit(sp, ZMAP, bit); if (bit < sp->s_zsearch) sp->s_zsearch = bit; + + /* Also tell libminixfs, so that 1) if it has a block for this bit, it can + * mark it as clean, thus reducing useless writes, and 2) it can tell VM that + * any previous inode association is to be broken for this block, so that the + * block will not be mapped in erroneously later on. + */ + assert(sp->s_log_zone_size == 0); /* otherwise we need a loop here.. */ + lmfs_free_block(dev, (block_t)numb); } - - diff --git a/minix/fs/mfs/write.c b/minix/fs/mfs/write.c index 3015b8b3d..3a87dd56f 100644 --- a/minix/fs/mfs/write.c +++ b/minix/fs/mfs/write.c @@ -165,7 +165,7 @@ int op; /* special actions */ wr_indir(bp, ex, new_zone); } /* z1 equals NO_ZONE only when we are freeing up the indirect block. */ - if(z1 == NO_ZONE) { MARKCLEAN(bp); } else { MARKDIRTY(bp); } + if(z1 != NO_ZONE) MARKDIRTY(bp); put_block(bp, INDIRECT_BLOCK); } @@ -175,7 +175,6 @@ int op; /* special actions */ */ if(z1 == NO_ZONE && !single && z2 != NO_ZONE && empty_indir(bp_dindir, rip->i_sp)) { - MARKCLEAN(bp_dindir); free_zone(rip->i_dev, z2); rip->i_zone[zones+1] = NO_ZONE; } diff --git a/minix/include/minix/com.h b/minix/include/minix/com.h index c3d42cc77..fe8267513 100644 --- a/minix/include/minix/com.h +++ b/minix/include/minix/com.h @@ -643,8 +643,11 @@ /* To VM: identify cache block in FS */ #define VM_SETCACHEPAGE (VM_RQ_BASE+27) +/* To VM: forget cache block in FS */ +#define VM_FORGETCACHEPAGE (VM_RQ_BASE+28) + /* To VM: clear all cache blocks for a device */ -#define VM_CLEARCACHE (VM_RQ_BASE+28) +#define VM_CLEARCACHE (VM_RQ_BASE+29) /* To VFS: fields for request from VM. */ # define VFS_VMCALL_REQ m10_i1 diff --git a/minix/include/minix/libminixfs.h b/minix/include/minix/libminixfs.h index 8b361aa84..207ca83d3 100644 --- a/minix/include/minix/libminixfs.h +++ b/minix/include/minix/libminixfs.h @@ -45,8 +45,9 @@ void lmfs_buf_pool(int new_nr_bufs); struct buf *lmfs_get_block(dev_t dev, block64_t block,int only_search); struct buf *lmfs_get_block_ino(dev_t dev, block64_t block,int only_search, ino_t ino, u64_t off); -void lmfs_invalidate(dev_t device); void lmfs_put_block(struct buf *bp, int block_type); +void lmfs_free_block(dev_t dev, block64_t block); +void lmfs_invalidate(dev_t device); void lmfs_rw_scattered(dev_t, struct buf **, int, int); void lmfs_setquiet(int q); void lmfs_cache_reevaluate(dev_t dev); diff --git a/minix/include/minix/vm.h b/minix/include/minix/vm.h index 9775d7133..b8a1c399e 100644 --- a/minix/include/minix/vm.h +++ b/minix/include/minix/vm.h @@ -21,8 +21,6 @@ int vm_update(endpoint_t src_e, endpoint_t dst_e); int vm_memctl(endpoint_t ep, int req); int vm_query_exit(endpoint_t *endpt); int vm_watch_exit(endpoint_t ep); -int vm_forgetblock(u64_t id); -void vm_forgetblocks(void); int minix_vfs_mmap(endpoint_t who, off_t offset, size_t len, dev_t dev, ino_t ino, int fd, u32_t vaddr, u16_t clearend, u16_t flags); @@ -73,10 +71,9 @@ int vm_procctl_handlemem(endpoint_t ep, vir_bytes m1, vir_bytes m2, int wr); int vm_set_cacheblock(void *block, dev_t dev, off_t dev_offset, ino_t ino, off_t ino_offset, u32_t *flags, int blocksize, int setflags); - void *vm_map_cacheblock(dev_t dev, off_t dev_offset, ino_t ino, off_t ino_offset, u32_t *flags, int blocksize); - +int vm_forget_cacheblock(dev_t dev, off_t dev_offset, int blocksize); int vm_clear_cache(dev_t dev); /* flags for vm cache functions */ diff --git a/minix/lib/libminixfs/cache.c b/minix/lib/libminixfs/cache.c index 9c6533d54..0b2ad2b1b 100644 --- a/minix/lib/libminixfs/cache.c +++ b/minix/lib/libminixfs/cache.c @@ -239,6 +239,27 @@ static void freeblock(struct buf *bp) } else assert(!bp->data); } +/*===========================================================================* + * find_block * + *===========================================================================*/ +static struct buf *find_block(dev_t dev, block64_t block) +{ +/* Search the hash chain for (dev, block). Return the buffer structure if + * found, or NULL otherwise. + */ + struct buf *bp; + int b; + + assert(dev != NO_DEV); + + b = BUFHASH(block); + for (bp = buf_hash[b]; bp != NULL; bp = bp->lmfs_hash) + if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) + return bp; + + return NULL; +} + /*===========================================================================* * lmfs_get_block_ino * *===========================================================================*/ @@ -284,50 +305,46 @@ struct buf *lmfs_get_block_ino(dev_t dev, block64_t block, int only_search, util_stacktrace(); } - /* Search the hash chain for (dev, block). */ - b = BUFHASH(block); - bp = buf_hash[b]; - while (bp != NULL) { - if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) { - if(bp->lmfs_flags & VMMC_EVICTED) { - /* We had it but VM evicted it; invalidate it. */ - ASSERT(bp->lmfs_count == 0); - ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); - ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); - bp->lmfs_dev = NO_DEV; - bp->lmfs_bytes = 0; - bp->data = NULL; - break; - } - /* Block needed has been found. */ - if (bp->lmfs_count == 0) { - rm_lru(bp); - ASSERT(bp->lmfs_needsetcache == 0); - ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); - bp->lmfs_flags |= VMMC_BLOCK_LOCKED; - } - raisecount(bp); - ASSERT(bp->lmfs_bytes == fs_block_size); - ASSERT(bp->lmfs_dev == dev); - ASSERT(bp->lmfs_dev != NO_DEV); - ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); - ASSERT(bp->data); + /* See if the block is in the cache. If so, we can return it right away. */ + bp = find_block(dev, block); + if (bp != NULL && !(bp->lmfs_flags & VMMC_EVICTED)) { + /* Block needed has been found. */ + if (bp->lmfs_count == 0) { + rm_lru(bp); + ASSERT(bp->lmfs_needsetcache == 0); + ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); + /* FIXME: race condition against the VMMC_EVICTED check */ + bp->lmfs_flags |= VMMC_BLOCK_LOCKED; + } + raisecount(bp); + ASSERT(bp->lmfs_bytes == fs_block_size); + ASSERT(bp->lmfs_dev == dev); + ASSERT(bp->lmfs_dev != NO_DEV); + ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); + ASSERT(bp->data); - if(ino != VMC_NO_INODE) { - if(bp->lmfs_inode == VMC_NO_INODE - || bp->lmfs_inode != ino - || bp->lmfs_inode_offset != ino_off) { - bp->lmfs_inode = ino; - bp->lmfs_inode_offset = ino_off; - bp->lmfs_needsetcache = 1; - } + if(ino != VMC_NO_INODE) { + if(bp->lmfs_inode == VMC_NO_INODE + || bp->lmfs_inode != ino + || bp->lmfs_inode_offset != ino_off) { + bp->lmfs_inode = ino; + bp->lmfs_inode_offset = ino_off; + bp->lmfs_needsetcache = 1; } + } - return(bp); - } else { - /* This block is not the one sought. */ - bp = bp->lmfs_hash; /* move to next block on hash chain */ - } + return(bp); + } + + /* We had the block in the cache but VM evicted it; invalidate it. */ + if (bp != NULL) { + assert(bp->lmfs_flags & VMMC_EVICTED); + ASSERT(bp->lmfs_count == 0); + ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); + ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); + bp->lmfs_dev = NO_DEV; + bp->lmfs_bytes = 0; + bp->data = NULL; } /* Desired block is not on available chain. Find a free block to use. */ @@ -440,7 +457,7 @@ void lmfs_put_block( if (bp->lmfs_count != 0) return; /* block is still in use */ /* Put this block back on the LRU chain. */ - if (dev == DEV_RAM || (block_type & ONE_SHOT)) { + if (dev == NO_DEV || dev == DEV_RAM || (block_type & ONE_SHOT)) { /* Block probably won't be needed quickly. Put it on front of chain. * It will be the next block to be evicted from the cache. */ @@ -483,7 +500,48 @@ void lmfs_put_block( } } bp->lmfs_needsetcache = 0; +} +/*===========================================================================* + * lmfs_free_block * + *===========================================================================*/ +void lmfs_free_block(dev_t dev, block64_t block) +{ +/* The file system has just freed the given block. The block may previously + * have been in use as data block for an inode. Therefore, we now need to tell + * VM that the block is no longer associated with an inode. If we fail to do so + * and the inode now has a hole at this location, mapping in the hole would + * yield the old block contents rather than a zeroed page. In addition, if the + * block is in the cache, it will be removed, even if it was dirty. + */ + struct buf *bp; + int r; + + /* Tell VM to forget about the block. The primary purpose of this call is to + * break the inode association, but since the block is part of a mounted file + * system, it is not expected to be accessed directly anyway. So, save some + * cache memory by throwing it out of the VM cache altogether. + */ + if (vmcache) { + if ((r = vm_forget_cacheblock(dev, block * fs_block_size, + fs_block_size)) != OK) + printf("libminixfs: vm_forget_cacheblock failed (%d)\n", r); + } + + if ((bp = find_block(dev, block)) != NULL) { + lmfs_markclean(bp); + + /* Invalidate the block. The block may or may not be in use right now, + * so don't be smart about freeing memory or repositioning in the LRU. + */ + bp->lmfs_dev = NO_DEV; + } + + /* Note that this is *not* the right place to implement TRIM support. Even + * though the block is freed, on the device it may still be part of a + * previous checkpoint or snapshot of some sort. Only the file system can + * be trusted to decide which blocks can be reused on the device! + */ } void lmfs_cache_reevaluate(dev_t dev) @@ -577,6 +635,10 @@ void lmfs_invalidate( } } + /* Clear the cache even if VM caching is disabled for the file system: + * caching may be disabled as side effect of an error, leaving blocks behind + * in the actual VM cache. + */ vm_clear_cache(device); } diff --git a/minix/lib/libsys/vm_cache.c b/minix/lib/libsys/vm_cache.c index be31981c2..c91e4e396 100644 --- a/minix/lib/libsys/vm_cache.c +++ b/minix/lib/libsys/vm_cache.c @@ -65,6 +65,14 @@ int vm_set_cacheblock(void *block, dev_t dev, off_t dev_offset, ino, ino_offset, flags, blocksize, setflags); } +int vm_forget_cacheblock(dev_t dev, off_t dev_offset, int blocksize) +{ + message m; + + return vm_cachecall(&m, VM_FORGETCACHEPAGE, NULL, dev, dev_offset, + VMC_NO_INODE, 0, 0, blocksize, 0); +} + int vm_clear_cache(dev_t dev) { diff --git a/minix/servers/vm/main.c b/minix/servers/vm/main.c index 73cf0e2c4..0fd120372 100644 --- a/minix/servers/vm/main.c +++ b/minix/servers/vm/main.c @@ -507,6 +507,7 @@ void init_vm(void) /* Cache blocks. */ CALLMAP(VM_MAPCACHEPAGE, do_mapcache); CALLMAP(VM_SETCACHEPAGE, do_setcache); + CALLMAP(VM_FORGETCACHEPAGE, do_forgetcache); CALLMAP(VM_CLEARCACHE, do_clearcache); /* getrusage */ diff --git a/minix/servers/vm/mem_cache.c b/minix/servers/vm/mem_cache.c index 6b1ace42a..1f6942da5 100644 --- a/minix/servers/vm/mem_cache.c +++ b/minix/servers/vm/mem_cache.c @@ -88,7 +88,7 @@ int do_mapcache(message *msg) { dev_t dev = msg->m_vmmcp.dev; - off_t dev_off = msg->m_vmmcp.dev_offset; + uint64_t dev_off = msg->m_vmmcp.dev_offset; off_t ino_off = msg->m_vmmcp.ino_offset; int n; phys_bytes bytes = msg->m_vmmcp.pages * VM_PAGE_SIZE; @@ -173,7 +173,7 @@ do_setcache(message *msg) { int r; dev_t dev = msg->m_vmmcp.dev; - off_t dev_off = msg->m_vmmcp.dev_offset; + uint64_t dev_off = msg->m_vmmcp.dev_offset; off_t ino_off = msg->m_vmmcp.ino_offset; int flags = msg->m_vmmcp.flags; int n; @@ -252,6 +252,38 @@ do_setcache(message *msg) return OK; } +/* + * Forget all pages associated to a particular block in the cache. + */ +int +do_forgetcache(message *msg) +{ + struct cached_page *hb; + dev_t dev; + uint64_t dev_off; + phys_bytes bytes, offset; + + dev = msg->m_vmmcp.dev; + dev_off = msg->m_vmmcp.dev_offset; + bytes = msg->m_vmmcp.pages * VM_PAGE_SIZE; + + if (bytes < VM_PAGE_SIZE) + return EINVAL; + + if (dev_off % PAGE_SIZE) { + printf("VM: unaligned cache operation\n"); + return EFAULT; + } + + for (offset = 0; offset < bytes; offset += VM_PAGE_SIZE) { + if ((hb = find_cached_page_bydev(dev, dev_off + offset, + VMC_NO_INODE, 0 /*ino_off*/, 0 /*touchlru*/)) != NULL) + rmcache(hb); + } + + return OK; +} + /* * A file system wants to invalidate all pages belonging to a certain device. */ diff --git a/minix/servers/vm/proto.h b/minix/servers/vm/proto.h index e61ea9f74..b96c31e27 100644 --- a/minix/servers/vm/proto.h +++ b/minix/servers/vm/proto.h @@ -221,6 +221,7 @@ void shared_setsource(struct vir_region *vr, endpoint_t ep, struct vir_region *s /* mem_cache.c */ int do_mapcache(message *m); int do_setcache(message *m); +int do_forgetcache(message *m); int do_clearcache(message *m); /* cache.c */ diff --git a/minix/tests/test72.c b/minix/tests/test72.c index 1723bced4..9e13f16dc 100644 --- a/minix/tests/test72.c +++ b/minix/tests/test72.c @@ -241,6 +241,11 @@ void *vm_map_cacheblock(dev_t dev, off_t dev_offset, return MAP_FAILED; } +int vm_forget_cacheblock(dev_t dev, off_t dev_offset, int blocksize) +{ + return 0; +} + int vm_clear_cache(dev_t dev) { return 0; diff --git a/minix/tests/test74.c b/minix/tests/test74.c index a0b3b1b40..b8cd36283 100644 --- a/minix/tests/test74.c +++ b/minix/tests/test74.c @@ -532,10 +532,168 @@ nonedev_regression(void) close(fd); } +/* + * Regression test for a nasty memory-mapped file corruption bug, which is not + * easy to reproduce but, before being solved, did occur in practice every once + * in a while. The executive summary is that through stale inode associations, + * VM could end up using an old block to satisfy a memory mapping. + * + * This subtest relies on a number of assumptions regarding allocation and + * reuse of inode numbers and blocks. These assumptions hold for MFS but + * possibly no other file system. However, if the subtest's assumptions are + * not met, it will simply succeed. + */ +static void +corruption_regression(void) +{ + char *ptr, *buf; + struct statvfs sf; + struct stat st; + size_t block_size; + off_t size; + int fd, fd2; + + subtest = 1; + + if (statvfs(".", &sf) != 0) e(0); + block_size = sf.f_bsize; + + if ((buf = malloc(block_size * 2)) == NULL) e(0); + + /* + * We first need a file that is just large enough that it requires the + * allocation of a metadata block - an indirect block - when more data + * is written to it. This is fileA. We keep it open throughout the + * test so we can unlink it immediately. + */ + if ((fd = open("fileA", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1) + e(0); + if (unlink("fileA") != 0) e(0); + + /* + * Write to fileA until its next block requires the allocation of an + * additional metadata block - an indirect block. + */ + size = 0; + memset(buf, 'A', block_size); + do { + /* + * Repeatedly write an extra block, until the file consists of + * more blocks than just the file data. + */ + if (write(fd, buf, block_size) != block_size) e(0); + size += block_size; + if (size >= block_size * 64) { + /* + * It doesn't look like this is going to work. + * Skip this subtest altogether. + */ + if (close(fd) != 0) e(0); + free(buf); + + return; + } + if (fstat(fd, &st) != 0) e(0); + } while (st.st_blocks * 512 == size); + + /* Once we get there, go one step back by truncating by one block. */ + size -= block_size; /* for MFS, size will end up being 7*block_size */ + if (ftruncate(fd, size) != 0) e(0); + + /* + * Create a first file, fileB, and write two blocks to it. FileB's + * blocks are going to end up in the secondary VM cache, associated to + * fileB's inode number (and two different offsets within the file). + * The block cache does not know about files getting deleted, so we can + * unlink fileB immediately after creating it. So far so good. + */ + if ((fd2 = open("fileB", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1) + e(0); + if (unlink("fileB") != 0) e(0); + memset(buf, 'B', block_size * 2); + if (write(fd2, buf, block_size * 2) != block_size * 2) e(0); + if (close(fd2) != 0) e(0); + + /* + * Write one extra block to fileA, hoping that this causes allocation + * of a metadata block as well. This is why we tried to get fileA to + * the point that one more block would also require the allocation of a + * metadata block. Our intent is to recycle the blocks that we just + * allocated and freed for fileB. As of writing, for the metadata + * block, this will *not* break the association with fileB's inode, + * which by itself is not a problem, yet crucial to reproducing + * the actual problem a bit later. Note that the test does not rely on + * whether the file system allocates the data block or the metadata + * block first, although it does need reverse deallocation (see below). + */ + memset(buf, 'A', block_size); + if (write(fd, buf, block_size) != block_size) e(0); + + /* + * Create a new file, fileC, which recycles the inode number of fileB, + * but uses two new blocks to store its data. These new blocks will + * get associated to the fileB inode number, and one of them will + * thereby eclipse (but not remove) the association of fileA's metadata + * block to the inode of fileB. + */ + if ((fd2 = open("fileC", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1) + e(0); + if (unlink("fileC") != 0) e(0); + memset(buf, 'C', block_size * 2); + if (write(fd2, buf, block_size * 2) != block_size * 2) e(0); + if (close(fd2) != 0) e(0); + + /* + * Free up the extra fileA blocks for reallocation, in particular + * including the metadata block. Again, this will not affect the + * contents of the VM cache in any way. FileA's metadata block remains + * cached in VM, with the inode association for fileB's block. + */ + if (ftruncate(fd, size) != 0) e(0); + + /* + * Now create yet one more file, fileD, which also recycles the inode + * number of fileB and fileC. Write two blocks to it; these blocks + * should recycle the blocks we just freed. One of these is fileA's + * just-freed metadata block, for which the new inode association will + * be equal to the inode association it had already (as long as blocks + * are freed in reverse order of their allocation, which happens to be + * the case for MFS). As a result, the block is not updated in the VM + * cache, and VM will therefore continue to see the inode association + * for the corresponding block of fileC which is still in the VM cache. + */ + if ((fd2 = open("fileD", O_CREAT | O_TRUNC | O_RDWR, 0600)) == -1) + e(0); + memset(buf, 'D', block_size * 2); + if (write(fd2, buf, block_size * 2) != block_size * 2) e(0); + + ptr = mmap(NULL, block_size * 2, PROT_READ, MAP_FILE, fd2, 0); + if (ptr == MAP_FAILED) e(0); + + /* + * Finally, we can test the issue. Since fileC's block is still the + * block for which VM has the corresponding inode association, VM will + * now find and map in fileC's block, instead of fileD's block. The + * result is that we get a memory-mapped area with stale contents, + * different from those of the underlying file. + */ + if (memcmp(buf, ptr, block_size * 2)) e(0); + + /* Clean up. */ + if (munmap(ptr, block_size * 2) != 0) e(0); + + if (close(fd2) != 0) e(0); + if (unlink("fileD") != 0) e(0); + + if (close(fd) != 0) e(0); + + free(buf); +} + int main(int argc, char *argv[]) { - int iter = 2; + int i, iter = 2; start(74); @@ -543,6 +701,14 @@ main(int argc, char *argv[]) nonedev_regression(); + /* + * Any inode or block allocation happening concurrently with this + * subtest will make the subtest succeed without testing the actual + * issue. Thus, repeat the subtest a fair number of times. + */ + for (i = 0; i < 10; i++) + corruption_regression(); + test_memory_types_vs_operations(); makefiles(MAXFILES); diff --git a/minix/tests/testvm.conf b/minix/tests/testvm.conf index ec42c0e25..7be4436f8 100644 --- a/minix/tests/testvm.conf +++ b/minix/tests/testvm.conf @@ -1,7 +1,7 @@ service testvm { ipc ALL; # All system ipc targets allowed system BASIC; # Only basic kernel calls allowed - vm MAPCACHEPAGE SETCACHEPAGE CLEARCACHE; + vm MAPCACHEPAGE SETCACHEPAGE FORGETCACHEPAGE CLEARCACHE; io NONE; # No I/O range allowed irq NONE; # No IRQ allowed sigmgr rs; # Signal manager is RS