7c48de6cc4
Change-Id: Ibc1b7f7cd45ad7295285e59c6ce55888266fece8
846 lines
23 KiB
C
846 lines
23 KiB
C
/* Test 74 - mmap functionality & regression test.
|
|
*
|
|
* This test tests some basic functionality of mmap, and also some
|
|
* cases that are quite complex for the system to handle.
|
|
*
|
|
* Memory pages are generally made available on demand. Memory copying
|
|
* is done by the kernel. As the kernel may encounter pagefaults in
|
|
* legitimate memory ranges (e.g. pages that aren't mapped; pages that
|
|
* are mapped RO as they are COW), it cooperates with VM to make the
|
|
* mappings and let the copy succeed transparently.
|
|
*
|
|
* With file-mapped ranges this can result in a deadlock, if care is
|
|
* not taken, as the copy might be request by VFS or an FS. This test
|
|
* triggers as many of these states as possible to ensure they are
|
|
* successful or (where appropriate) fail gracefully, i.e. without
|
|
* deadlock.
|
|
*
|
|
* To do this, system calls are done with source or target buffers with
|
|
* missing or readonly mappings, both anonymous and file-mapped. The
|
|
* cache is flushed before mmap() so that we know the mappings should
|
|
* not be present on mmap() time. Then e.g. a read() or write() is
|
|
* executed with that buffer as target. This triggers a FS copying
|
|
* to or from a missing range that it itself is needed to map in first.
|
|
* VFS detects this, requests VM to map in the pages, which does so with
|
|
* the help of another VFS thread and the FS, and then re-issues the
|
|
* request to the FS.
|
|
*
|
|
* Another case is the VFS itself does such a copy. This is actually
|
|
* unusual as filenames are already faulted in by the requesting process
|
|
* in libc by strlen(). select() allows such a case, however, so this
|
|
* is tested too. We are satisfied if the call completes.
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/ioc_memory.h>
|
|
#include <sys/param.h>
|
|
#include <minix/paths.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <dirent.h>
|
|
|
|
#include "common.h"
|
|
#include "testcache.h"
|
|
|
|
int max_error = 0; /* make all e()'s fatal */
|
|
|
|
int
|
|
dowriteblock(int b, int blocksize, u32_t seed, char *data)
|
|
{
|
|
u64_t offset;
|
|
int fd;
|
|
|
|
get_fd_offset(b, blocksize, &offset, &fd);
|
|
|
|
if(pwrite(fd, data, blocksize, offset) < blocksize) {
|
|
perror("pwrite");
|
|
return -1;
|
|
}
|
|
|
|
return blocksize;
|
|
}
|
|
|
|
int
|
|
readblock(int b, int blocksize, u32_t seed, char *data)
|
|
{
|
|
u64_t offset;
|
|
int fd;
|
|
char *mmapdata;
|
|
int pread_first = random() % 2;
|
|
|
|
get_fd_offset(b, blocksize, &offset, &fd);
|
|
|
|
if(pread_first) {
|
|
if(pread(fd, data, blocksize, offset) < blocksize) {
|
|
perror("pread");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if((mmapdata = mmap(NULL, blocksize, PROT_READ, MAP_PRIVATE | MAP_FILE,
|
|
fd, offset)) == MAP_FAILED) {
|
|
perror("mmap");
|
|
return -1;
|
|
}
|
|
|
|
if(!pread_first) {
|
|
if(pread(fd, data, blocksize, offset) < blocksize) {
|
|
perror("pread");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if(memcmp(mmapdata, data, blocksize)) {
|
|
fprintf(stderr, "readblock: mmap, pread mismatch\n");
|
|
return -1;
|
|
}
|
|
|
|
if(munmap(mmapdata, blocksize) < 0) {
|
|
perror("munmap");
|
|
return -1;
|
|
}
|
|
|
|
return blocksize;
|
|
}
|
|
|
|
void testend(void) { }
|
|
|
|
static void do_read(void *buf, int fd, int writable)
|
|
{
|
|
ssize_t ret;
|
|
size_t n = PAGE_SIZE;
|
|
struct stat sb;
|
|
if(fstat(fd, &sb) < 0) e(1);
|
|
if(S_ISDIR(sb.st_mode)) return;
|
|
ret = read(fd, buf, n);
|
|
|
|
/* if the buffer is writable, it should succeed */
|
|
if(writable) { if(ret != n) e(3); return; }
|
|
|
|
/* if the buffer is not writable, it should fail with EFAULT */
|
|
if(ret >= 0) e(4);
|
|
if(errno != EFAULT) e(5);
|
|
}
|
|
|
|
static void do_write(void *buf, int fd, int writable)
|
|
{
|
|
size_t n = PAGE_SIZE;
|
|
struct stat sb;
|
|
if(fstat(fd, &sb) < 0) e(1);
|
|
if(S_ISDIR(sb.st_mode)) return;
|
|
if(write(fd, buf, n) != n) e(3);
|
|
}
|
|
|
|
static void do_stat(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
r = fstat(fd, (struct stat *) buf);
|
|
|
|
/* should succeed if buf is writable */
|
|
if(writable) { if(r < 0) e(3); return; }
|
|
|
|
/* should fail with EFAULT if buf is not */
|
|
if(r >= 0) e(4);
|
|
if(errno != EFAULT) e(5);
|
|
}
|
|
|
|
static void do_getdents(void *buf, int fd, int writable)
|
|
{
|
|
struct stat sb;
|
|
int r;
|
|
if(fstat(fd, &sb) < 0) e(1);
|
|
if(!S_ISDIR(sb.st_mode)) return; /* OK */
|
|
r = getdents(fd, buf, PAGE_SIZE);
|
|
if(writable) { if(r < 0) e(3); return; }
|
|
|
|
/* should fail with EFAULT if buf is not */
|
|
if(r >= 0) e(4);
|
|
if(errno != EFAULT) e(5);
|
|
}
|
|
|
|
static void do_readlink1(void *buf, int fd, int writable)
|
|
{
|
|
char target[200];
|
|
/* the system call just has to fail gracefully */
|
|
readlink(buf, target, sizeof(target));
|
|
}
|
|
|
|
#define NODENAME "a"
|
|
#define TARGETNAME "b"
|
|
|
|
static void do_readlink2(void *buf, int fd, int writable)
|
|
{
|
|
ssize_t rl;
|
|
unlink(NODENAME);
|
|
if(symlink(TARGETNAME, NODENAME) < 0) e(1);
|
|
rl=readlink(NODENAME, buf, sizeof(buf));
|
|
|
|
/* if buf is writable, it should succeed, with a certain result */
|
|
if(writable) {
|
|
if(rl < 0) e(2);
|
|
((char *) buf)[rl] = '\0';
|
|
if(strcmp(buf, TARGETNAME)) {
|
|
fprintf(stderr, "readlink: expected %s, got %s\n",
|
|
TARGETNAME, (char *)buf);
|
|
e(3);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* if buf is not writable, it should fail with EFAULT */
|
|
if(rl >= 0) e(4);
|
|
|
|
if(errno != EFAULT) e(5);
|
|
}
|
|
|
|
static void do_symlink1(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
/* the system call just has to fail gracefully */
|
|
r = symlink(buf, NODENAME);
|
|
}
|
|
|
|
static void do_symlink2(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
/* the system call just has to fail gracefully */
|
|
r = symlink(NODENAME, buf);
|
|
}
|
|
|
|
static void do_open(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
/* the system call just has to fail gracefully */
|
|
r = open(buf, O_RDONLY);
|
|
if(r >= 0) close(r);
|
|
}
|
|
|
|
static void do_select1(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
struct timeval timeout = { 0, 200000 }; /* 0.2 sec */
|
|
/* the system call just has to fail gracefully */
|
|
r = select(1, buf, NULL, NULL, &timeout);
|
|
}
|
|
|
|
static void do_select2(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
struct timeval timeout = { 0, 200000 }; /* 1 sec */
|
|
/* the system call just has to fail gracefully */
|
|
r = select(1, NULL, buf, NULL, &timeout);
|
|
}
|
|
|
|
static void do_select3(void *buf, int fd, int writable)
|
|
{
|
|
int r;
|
|
struct timeval timeout = { 0, 200000 }; /* 1 sec */
|
|
/* the system call just has to fail gracefully */
|
|
r = select(1, NULL, NULL, buf, &timeout);
|
|
}
|
|
|
|
static void fillfile(int fd, int size)
|
|
{
|
|
char *buf = malloc(size);
|
|
|
|
if(size < 1 || size % PAGE_SIZE || !buf) { e(1); }
|
|
memset(buf, 'A', size);
|
|
buf[50] = '\0'; /* so it can be used as a filename arg */
|
|
buf[size-1] = '\0';
|
|
if(write(fd, buf, size) != size) { e(2); }
|
|
if(lseek(fd, SEEK_SET, 0) < 0) { e(3); }
|
|
free(buf);
|
|
}
|
|
|
|
static void make_buffers(int size,
|
|
int *ret_fd_rw, int *ret_fd_ro,
|
|
void **filebuf_rw, void **filebuf_ro, void **anonbuf)
|
|
{
|
|
char fn_rw[] = "testfile_rw.XXXXXX", fn_ro[] = "testfile_ro.XXXXXX";
|
|
*ret_fd_rw = mkstemp(fn_rw);
|
|
*ret_fd_ro = mkstemp(fn_ro);
|
|
|
|
if(size < 1 || size % PAGE_SIZE) { e(2); }
|
|
if(*ret_fd_rw < 0) { e(1); }
|
|
if(*ret_fd_ro < 0) { e(1); }
|
|
fillfile(*ret_fd_rw, size);
|
|
fillfile(*ret_fd_ro, size);
|
|
if(fcntl(*ret_fd_rw, F_FLUSH_FS_CACHE) < 0) { e(4); }
|
|
if(fcntl(*ret_fd_ro, F_FLUSH_FS_CACHE) < 0) { e(4); }
|
|
|
|
if((*filebuf_rw = mmap(0, size, PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_FILE, *ret_fd_rw, 0)) == MAP_FAILED) {
|
|
e(5);
|
|
quit();
|
|
}
|
|
|
|
if((*filebuf_ro = mmap(0, size, PROT_READ,
|
|
MAP_PRIVATE | MAP_FILE, *ret_fd_ro, 0)) == MAP_FAILED) {
|
|
e(5);
|
|
quit();
|
|
}
|
|
|
|
if((*anonbuf = mmap(0, size, PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANON, -1, 0)) == MAP_FAILED) {
|
|
e(6);
|
|
quit();
|
|
}
|
|
|
|
if(unlink(fn_rw) < 0) { e(12); }
|
|
if(unlink(fn_ro) < 0) { e(12); }
|
|
}
|
|
|
|
static void forget_buffers(void *buf1, void *buf2, void *buf3, int fd1, int fd2, int size)
|
|
{
|
|
if(munmap(buf1, size) < 0) { e(1); }
|
|
if(munmap(buf2, size) < 0) { e(2); }
|
|
if(munmap(buf3, size) < 0) { e(2); }
|
|
if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(3); }
|
|
if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(3); }
|
|
if(close(fd1) < 0) { e(4); }
|
|
if(close(fd2) < 0) { e(4); }
|
|
}
|
|
|
|
#define NEXPERIMENTS 12
|
|
struct {
|
|
void (*do_operation)(void * buf, int fd, int writable);
|
|
} experiments[NEXPERIMENTS] = {
|
|
{ do_read },
|
|
{ do_write },
|
|
{ do_stat },
|
|
{ do_getdents },
|
|
{ do_readlink1 },
|
|
{ do_readlink2 },
|
|
{ do_symlink1 },
|
|
{ do_symlink2 },
|
|
{ do_open, },
|
|
{ do_select1 },
|
|
{ do_select2 },
|
|
{ do_select3 },
|
|
};
|
|
|
|
static void test_memory_types_vs_operations(void)
|
|
{
|
|
#define NFDS 4
|
|
#define BUFSIZE (10 * PAGE_SIZE)
|
|
int exp, fds[NFDS];
|
|
int f = 0, size = BUFSIZE;
|
|
|
|
/* open some test fd's */
|
|
#define OPEN(fn, mode) { assert(f >= 0 && f < NFDS); \
|
|
fds[f] = open(fn, mode); if(fds[f] < 0) { e(2); } f++; }
|
|
OPEN("regular", O_RDWR | O_CREAT);
|
|
OPEN(".", O_RDONLY);
|
|
OPEN("/dev/ram", O_RDWR);
|
|
OPEN("/dev/zero", O_RDWR);
|
|
|
|
/* make sure the regular file has plenty of size to play with */
|
|
fillfile(fds[0], BUFSIZE);
|
|
|
|
/* and the ramdisk too */
|
|
if(ioctl(fds[2], MIOCRAMSIZE, &size) < 0) { e(3); }
|
|
|
|
for(exp = 0; exp < NEXPERIMENTS; exp++) {
|
|
for(f = 0; f < NFDS; f++) {
|
|
void *anonmem, *filemem_rw, *filemem_ro;
|
|
int buffd_rw, buffd_ro;
|
|
|
|
make_buffers(BUFSIZE, &buffd_rw, &buffd_ro,
|
|
&filemem_rw, &filemem_ro, &anonmem);
|
|
|
|
if(lseek(fds[f], 0, SEEK_SET) != 0) { e(10); }
|
|
experiments[exp].do_operation(anonmem, fds[f], 1);
|
|
|
|
if(lseek(fds[f], 0, SEEK_SET) != 0) { e(11); }
|
|
experiments[exp].do_operation(filemem_rw, fds[f], 1);
|
|
|
|
if(lseek(fds[f], 0, SEEK_SET) != 0) { e(12); }
|
|
experiments[exp].do_operation(filemem_ro, fds[f], 0);
|
|
|
|
forget_buffers(filemem_rw, filemem_ro, anonmem, buffd_rw, buffd_ro, BUFSIZE);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void basic_regression(void)
|
|
{
|
|
int fd, fd1, fd2;
|
|
ssize_t rb, wr;
|
|
char buf[PAGE_SIZE*2];
|
|
void *block, *block1, *block2;
|
|
#define BLOCKSIZE (PAGE_SIZE*10)
|
|
block = mmap(0, BLOCKSIZE, PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANON, -1, 0);
|
|
|
|
if(block == MAP_FAILED) { e(1); }
|
|
|
|
memset(block, 0, BLOCKSIZE);
|
|
|
|
/* shrink from bottom */
|
|
munmap(block, PAGE_SIZE);
|
|
|
|
/* Next test: use a system call write() to access a block of
|
|
* unavailable file-mapped memory.
|
|
*
|
|
* This is a thorny corner case to make succeed transparently
|
|
* because
|
|
* (1) it is a filesystem that is doing the memory access
|
|
* (copy from the constblock1 range in this process to the
|
|
* FS) but is also the FS needed to satisfy the range if it
|
|
* isn't in the cache.
|
|
* (2) there are two separate memory regions involved, requiring
|
|
* separate VFS requests from VM to properly satisfy, requiring
|
|
* some complex state to be kept.
|
|
*/
|
|
|
|
fd1 = open("../testsh1", O_RDONLY);
|
|
fd2 = open("../testsh2", O_RDONLY);
|
|
if(fd1 < 0 || fd2 < 0) { e(2); }
|
|
|
|
/* just check that we can't mmap() a file writable */
|
|
if(mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FILE, fd1, 0) != MAP_FAILED) {
|
|
e(1);
|
|
}
|
|
|
|
/* check that we can mmap() a file MAP_SHARED readonly */
|
|
if(mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_FILE, fd1, 0) == MAP_FAILED) {
|
|
e(1);
|
|
}
|
|
|
|
/* clear cache of files before mmap so pages won't be present already */
|
|
if(fcntl(fd1, F_FLUSH_FS_CACHE) < 0) { e(1); }
|
|
if(fcntl(fd2, F_FLUSH_FS_CACHE) < 0) { e(1); }
|
|
|
|
#define LOCATION1 (void *) 0x90000000
|
|
#define LOCATION2 ((void *)((char *)LOCATION1 + PAGE_SIZE))
|
|
block1 = mmap(LOCATION1, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd1, 0);
|
|
if(block1 == MAP_FAILED) { e(4); }
|
|
if(block1 != LOCATION1) { e(5); }
|
|
|
|
block2 = mmap(LOCATION2, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_FILE, fd2, 0);
|
|
if(block2 == MAP_FAILED) { e(10); }
|
|
if(block2 != LOCATION2) { e(11); }
|
|
|
|
unlink("testfile");
|
|
fd = open("testfile", O_CREAT | O_RDWR);
|
|
if(fd < 0) { e(15); }
|
|
|
|
/* write() using the mmap()ped memory as buffer */
|
|
|
|
if((wr=write(fd, LOCATION1, sizeof(buf))) != sizeof(buf)) {
|
|
fprintf(stderr, "wrote %zd bytes instead of %zd\n",
|
|
wr, sizeof(buf));
|
|
e(20);
|
|
quit();
|
|
}
|
|
|
|
/* verify written contents */
|
|
|
|
if((rb=pread(fd, buf, sizeof(buf), 0)) != sizeof(buf)) {
|
|
if(rb < 0) perror("pread");
|
|
fprintf(stderr, "wrote %zd bytes\n", wr);
|
|
fprintf(stderr, "read %zd bytes instead of %zd\n",
|
|
rb, sizeof(buf));
|
|
e(21);
|
|
quit();
|
|
}
|
|
|
|
if(memcmp(buf, LOCATION1, sizeof(buf))) {
|
|
e(22);
|
|
quit();
|
|
}
|
|
|
|
close(fd);
|
|
close(fd1);
|
|
close(fd2);
|
|
|
|
}
|
|
|
|
/*
|
|
* Test mmap on none-dev file systems - file systems that do not have a buffer
|
|
* cache and therefore have to fake mmap support. We use procfs as target.
|
|
* The idea is that while we succeed in mapping in /proc/uptime, we also get
|
|
* a new uptime value every time we map in the page -- VM must not cache it.
|
|
*/
|
|
static void
|
|
nonedev_regression(void)
|
|
{
|
|
int fd, fd2;
|
|
char *buf;
|
|
unsigned long uptime1, uptime2, uptime3;
|
|
|
|
subtest++;
|
|
|
|
if ((fd = open(_PATH_PROC "uptime", O_RDONLY)) < 0) e(1);
|
|
|
|
buf = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
|
|
if (buf == MAP_FAILED) e(2);
|
|
|
|
if (buf[4095] != 0) e(3);
|
|
|
|
if ((uptime1 = atoi(buf)) == 0) e(4);
|
|
|
|
if (munmap(buf, 4096) != 0) e(5);
|
|
|
|
sleep(2);
|
|
|
|
buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FILE,
|
|
fd, 0);
|
|
if (buf == MAP_FAILED) e(6);
|
|
|
|
if (buf[4095] != 0) e(7);
|
|
|
|
if ((uptime2 = atoi(buf)) == 0) e(8);
|
|
|
|
if (uptime1 == uptime2) e(9);
|
|
|
|
if (munmap(buf, 4096) != 0) e(10);
|
|
|
|
sleep(2);
|
|
|
|
buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
|
|
if (buf == MAP_FAILED) e(11);
|
|
|
|
if (buf[4095] != 0) e(12);
|
|
|
|
if ((uptime3 = atoi(buf)) == 0) e(13);
|
|
|
|
if (uptime1 == uptime3) e(14);
|
|
if (uptime2 == uptime3) e(15);
|
|
|
|
if (munmap(buf, 4096) != 0) e(16);
|
|
|
|
/* Also test page faults not incurred by the process itself. */
|
|
if ((fd2 = open("testfile", O_CREAT | O_TRUNC | O_WRONLY)) < 0) e(17);
|
|
|
|
if (unlink("testfile") != 0) e(18);
|
|
|
|
buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0);
|
|
if (buf == MAP_FAILED) e(19);
|
|
|
|
if (write(fd2, buf, 10) != 10) e(20);
|
|
|
|
if (munmap(buf, 4096) != 0) e(21);
|
|
|
|
close(fd2);
|
|
close(fd);
|
|
}
|
|
|
|
/*
|
|
* Regression test for a nasty memory-mapped file corruption bug, which is not
|
|
* easy to reproduce but, before being solved, did occur in practice every once
|
|
* in a while. The executive summary is that through stale inode associations,
|
|
* VM could end up using an old block to satisfy a memory mapping.
|
|
*
|
|
* This subtest relies on a number of assumptions regarding allocation and
|
|
* reuse of inode numbers and blocks. These assumptions hold for MFS but
|
|
* possibly no other file system. However, if the subtest's assumptions are
|
|
* not met, it will simply succeed.
|
|
*/
|
|
static void
|
|
corruption_regression(void)
|
|
{
|
|
char *ptr, *buf;
|
|
struct statvfs sf;
|
|
struct stat st;
|
|
size_t block_size;
|
|
off_t size;
|
|
int fd, fd2;
|
|
|
|
subtest = 1;
|
|
|
|
if (statvfs(".", &sf) != 0) e(0);
|
|
block_size = sf.f_bsize;
|
|
|
|
if ((buf = malloc(block_size * 2)) == NULL) e(0);
|
|
|
|
/*
|
|
* We first need a file that is just large enough that it requires the
|
|
* allocation of a metadata block - an indirect block - when more data
|
|
* is written to it. This is fileA. We keep it open throughout the
|
|
* test so we can unlink it immediately.
|
|
*/
|
|
if ((fd = open("fileA", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
|
|
e(0);
|
|
if (unlink("fileA") != 0) e(0);
|
|
|
|
/*
|
|
* Write to fileA until its next block requires the allocation of an
|
|
* additional metadata block - an indirect block.
|
|
*/
|
|
size = 0;
|
|
memset(buf, 'A', block_size);
|
|
do {
|
|
/*
|
|
* Repeatedly write an extra block, until the file consists of
|
|
* more blocks than just the file data.
|
|
*/
|
|
if (write(fd, buf, block_size) != block_size) e(0);
|
|
size += block_size;
|
|
if (size >= block_size * 64) {
|
|
/*
|
|
* It doesn't look like this is going to work.
|
|
* Skip this subtest altogether.
|
|
*/
|
|
if (close(fd) != 0) e(0);
|
|
free(buf);
|
|
|
|
return;
|
|
}
|
|
if (fstat(fd, &st) != 0) e(0);
|
|
} while (st.st_blocks * 512 == size);
|
|
|
|
/* Once we get there, go one step back by truncating by one block. */
|
|
size -= block_size; /* for MFS, size will end up being 7*block_size */
|
|
if (ftruncate(fd, size) != 0) e(0);
|
|
|
|
/*
|
|
* Create a first file, fileB, and write two blocks to it. FileB's
|
|
* blocks are going to end up in the secondary VM cache, associated to
|
|
* fileB's inode number (and two different offsets within the file).
|
|
* The block cache does not know about files getting deleted, so we can
|
|
* unlink fileB immediately after creating it. So far so good.
|
|
*/
|
|
if ((fd2 = open("fileB", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
|
|
e(0);
|
|
if (unlink("fileB") != 0) e(0);
|
|
memset(buf, 'B', block_size * 2);
|
|
if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
|
|
if (close(fd2) != 0) e(0);
|
|
|
|
/*
|
|
* Write one extra block to fileA, hoping that this causes allocation
|
|
* of a metadata block as well. This is why we tried to get fileA to
|
|
* the point that one more block would also require the allocation of a
|
|
* metadata block. Our intent is to recycle the blocks that we just
|
|
* allocated and freed for fileB. As of writing, for the metadata
|
|
* block, this will *not* break the association with fileB's inode,
|
|
* which by itself is not a problem, yet crucial to reproducing
|
|
* the actual problem a bit later. Note that the test does not rely on
|
|
* whether the file system allocates the data block or the metadata
|
|
* block first, although it does need reverse deallocation (see below).
|
|
*/
|
|
memset(buf, 'A', block_size);
|
|
if (write(fd, buf, block_size) != block_size) e(0);
|
|
|
|
/*
|
|
* Create a new file, fileC, which recycles the inode number of fileB,
|
|
* but uses two new blocks to store its data. These new blocks will
|
|
* get associated to the fileB inode number, and one of them will
|
|
* thereby eclipse (but not remove) the association of fileA's metadata
|
|
* block to the inode of fileB.
|
|
*/
|
|
if ((fd2 = open("fileC", O_CREAT | O_TRUNC | O_WRONLY, 0600)) == -1)
|
|
e(0);
|
|
if (unlink("fileC") != 0) e(0);
|
|
memset(buf, 'C', block_size * 2);
|
|
if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
|
|
if (close(fd2) != 0) e(0);
|
|
|
|
/*
|
|
* Free up the extra fileA blocks for reallocation, in particular
|
|
* including the metadata block. Again, this will not affect the
|
|
* contents of the VM cache in any way. FileA's metadata block remains
|
|
* cached in VM, with the inode association for fileB's block.
|
|
*/
|
|
if (ftruncate(fd, size) != 0) e(0);
|
|
|
|
/*
|
|
* Now create yet one more file, fileD, which also recycles the inode
|
|
* number of fileB and fileC. Write two blocks to it; these blocks
|
|
* should recycle the blocks we just freed. One of these is fileA's
|
|
* just-freed metadata block, for which the new inode association will
|
|
* be equal to the inode association it had already (as long as blocks
|
|
* are freed in reverse order of their allocation, which happens to be
|
|
* the case for MFS). As a result, the block is not updated in the VM
|
|
* cache, and VM will therefore continue to see the inode association
|
|
* for the corresponding block of fileC which is still in the VM cache.
|
|
*/
|
|
if ((fd2 = open("fileD", O_CREAT | O_TRUNC | O_RDWR, 0600)) == -1)
|
|
e(0);
|
|
memset(buf, 'D', block_size * 2);
|
|
if (write(fd2, buf, block_size * 2) != block_size * 2) e(0);
|
|
|
|
ptr = mmap(NULL, block_size * 2, PROT_READ, MAP_FILE, fd2, 0);
|
|
if (ptr == MAP_FAILED) e(0);
|
|
|
|
/*
|
|
* Finally, we can test the issue. Since fileC's block is still the
|
|
* block for which VM has the corresponding inode association, VM will
|
|
* now find and map in fileC's block, instead of fileD's block. The
|
|
* result is that we get a memory-mapped area with stale contents,
|
|
* different from those of the underlying file.
|
|
*/
|
|
if (memcmp(buf, ptr, block_size * 2)) e(0);
|
|
|
|
/* Clean up. */
|
|
if (munmap(ptr, block_size * 2) != 0) e(0);
|
|
|
|
if (close(fd2) != 0) e(0);
|
|
if (unlink("fileD") != 0) e(0);
|
|
|
|
if (close(fd) != 0) e(0);
|
|
|
|
free(buf);
|
|
}
|
|
|
|
/*
|
|
* Test mmap on file holes. Holes are a tricky case with the current VM
|
|
* implementation. There are two main issues. First, whenever a file data
|
|
* block is freed, VM has to know about this, or it will later blindly map in
|
|
* the old data. This, file systems explicitly tell VM (through libminixfs)
|
|
* whenever a block is freed, upon which VM cache forgets the block. Second,
|
|
* blocks are accessed primarily by a <dev,dev_off> pair and only additionally
|
|
* by a <ino,ino_off> pair. Holes have no meaningful value for the first pair,
|
|
* but do need to be registered in VM with the second pair, or accessing them
|
|
* will generate a segmentation fault. Thus, file systems explicitly tell VM
|
|
* (through libminixfs) when a hole is being peeked; libminixfs currently fakes
|
|
* a device offset to make this work.
|
|
*/
|
|
static void
|
|
hole_regression(void)
|
|
{
|
|
struct statvfs st;
|
|
size_t block_size;
|
|
char *buf;
|
|
int fd;
|
|
|
|
if (statvfs(".", &st) < 0) e(1);
|
|
|
|
block_size = st.f_bsize;
|
|
|
|
if ((buf = malloc(block_size)) == NULL) e(2);
|
|
|
|
if ((fd = open("testfile", O_CREAT | O_TRUNC | O_RDWR)) < 0) e(3);
|
|
|
|
if (unlink("testfile") != 0) e(4);
|
|
|
|
/*
|
|
* We perform the test twice, in a not-so-perfect attempt to test the
|
|
* two aspects independently. The first part immediately creates a
|
|
* hole, and is supposed to fail only if reporting holes to VM does not
|
|
* work. However, it may also fail if a page for a previous file with
|
|
* the same inode number as "testfile" is still in the VM cache.
|
|
*/
|
|
memset(buf, 12, block_size);
|
|
|
|
if (write(fd, buf, block_size) != block_size) e(5);
|
|
|
|
if (lseek(fd, block_size * 2, SEEK_CUR) != block_size * 3) e(6);
|
|
|
|
memset(buf, 78, block_size);
|
|
|
|
if (write(fd, buf, block_size) != block_size) e(7);
|
|
|
|
free(buf);
|
|
|
|
if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE,
|
|
fd, 0)) == MAP_FAILED) e(8);
|
|
|
|
if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(9);
|
|
if (buf[1 * block_size] != 0 || buf[2 * block_size - 1] != 0) e(10);
|
|
if (buf[2 * block_size] != 0 || buf[3 * block_size - 1] != 0) e(11);
|
|
if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(12);
|
|
|
|
if (munmap(buf, 4 * block_size) != 0) e(13);
|
|
|
|
/*
|
|
* The second part first creates file content and only turns part of it
|
|
* into a file hole, thus ensuring that VM has previously cached pages
|
|
* for the blocks that are freed. The test will fail if VM keeps the
|
|
* pages around in its cache.
|
|
*/
|
|
if ((buf = malloc(block_size)) == NULL) e(14);
|
|
|
|
if (lseek(fd, block_size, SEEK_SET) != block_size) e(15);
|
|
|
|
memset(buf, 34, block_size);
|
|
|
|
if (write(fd, buf, block_size) != block_size) e(16);
|
|
|
|
memset(buf, 56, block_size);
|
|
|
|
if (write(fd, buf, block_size) != block_size) e(17);
|
|
|
|
if (ftruncate(fd, block_size) != 0) e(18);
|
|
|
|
if (lseek(fd, block_size * 3, SEEK_SET) != block_size * 3) e(19);
|
|
|
|
memset(buf, 78, block_size);
|
|
|
|
if (write(fd, buf, block_size) != block_size) e(20);
|
|
|
|
free(buf);
|
|
|
|
if ((buf = mmap(NULL, 4 * block_size, PROT_READ, MAP_SHARED | MAP_FILE,
|
|
fd, 0)) == MAP_FAILED) e(21);
|
|
|
|
if (buf[0 * block_size] != 12 || buf[1 * block_size - 1] != 12) e(22);
|
|
if (buf[1 * block_size] != 0 || buf[2 * block_size - 1] != 0) e(23);
|
|
if (buf[2 * block_size] != 0 || buf[3 * block_size - 1] != 0) e(24);
|
|
if (buf[3 * block_size] != 78 || buf[4 * block_size - 1] != 78) e(25);
|
|
|
|
if (munmap(buf, 4 * block_size) != 0) e(26);
|
|
|
|
close(fd);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int i, iter = 2;
|
|
|
|
start(74);
|
|
|
|
basic_regression();
|
|
|
|
nonedev_regression();
|
|
|
|
/*
|
|
* Any inode or block allocation happening concurrently with this
|
|
* subtest will make the subtest succeed without testing the actual
|
|
* issue. Thus, repeat the subtest a fair number of times.
|
|
*/
|
|
for (i = 0; i < 10; i++)
|
|
corruption_regression();
|
|
|
|
hole_regression();
|
|
|
|
test_memory_types_vs_operations();
|
|
|
|
makefiles(MAXFILES);
|
|
|
|
cachequiet(!bigflag);
|
|
if(bigflag) iter = 3;
|
|
|
|
/* Try various combinations working set sizes
|
|
* and block sizes in order to specifically
|
|
* target the primary cache, then primary+secondary
|
|
* cache, then primary+secondary cache+secondary
|
|
* cache eviction.
|
|
*/
|
|
|
|
if(dotest(PAGE_SIZE, 100, iter)) e(5);
|
|
if(dotest(PAGE_SIZE*2, 100, iter)) e(2);
|
|
if(dotest(PAGE_SIZE*3, 100, iter)) e(3);
|
|
if(dotest(PAGE_SIZE, 20000, iter)) e(5);
|
|
|
|
if(bigflag) {
|
|
u32_t totalmem, freemem, cachedmem;
|
|
if(dotest(PAGE_SIZE, 150000, iter)) e(5);
|
|
getmem(&totalmem, &freemem, &cachedmem);
|
|
if(dotest(PAGE_SIZE, totalmem*1.5, iter)) e(6);
|
|
}
|
|
|
|
quit();
|
|
|
|
return 0;
|
|
}
|
|
|