minix/minix/tests/test85.c
David van Moolenbroek 6c46a77d95 libminixfs: better support for read errors and EOF
- The lmfs_get_block*(3) API calls may now return an error.  The idea
  is to encourage a next generation of file system services to do a
  better job at dealing with block read errors than the MFS-derived
  implementations do.  These existing file systems have been changed
  to panic immediately upon getting a block read error, in order to
  let unchecked errors cause corruption.  Note that libbdev already
  retries failing I/O operations a few times first.

- The libminixfs block device I/O module (bio.c) now deals properly
  with end-of-file conditions on block devices.  Since a device or
  partition size may not be a multiple of the root file system's block
  size, support for partial block retrival has been added, with a new
  internal lmfs_get_partial_block(3) call.  A new test program,
  test85, tests the new handling of EOF conditions when reading,
  writing, and memory-mapping a block device.

Change-Id: I05e35b6b8851488328a2679da635ebba0c6d08ce
2015-08-14 18:39:26 +00:00

541 lines
11 KiB
C

/* Test for end-of-file during block device I/O - by D.C. van Moolenbroek */
/* This test needs to be run as root; it sets up and uses a VND instance. */
/*
* The test should work with all root file system block sizes, but only tests
* certain corner cases if the root FS block size is twice the page size.
*/
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <sys/param.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <minix/partition.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#define VNCONFIG "/usr/sbin/vnconfig"
#define SECTOR_SIZE 512 /* this should be the sector size of VND */
#define ITERATIONS 3
enum {
BEFORE_EOF,
UPTO_EOF,
ACROSS_EOF,
ONEPAST_EOF,
FROM_EOF,
BEYOND_EOF
};
#include "common.h"
static int need_cleanup = 0;
static int dev_fd;
static size_t dev_size;
static char *dev_buf;
static char *dev_ref;
static size_t block_size;
static size_t page_size;
static int test_peek;
static char *mmap_ptr = NULL;
static size_t mmap_size;
static int pipe_fd[2];
/*
* Fill the given buffer with random contents.
*/
static void
fill_buf(char * buf, size_t size)
{
while (size--)
*buf++ = lrand48() & 0xff;
}
/*
* Place the elements of the source array in the destination array in random
* order. There are probably better ways to do this, but it is morning, and I
* haven't had coffee yet, so go away.
*/
static void
scramble(int * dst, const int * src, int count)
{
int i, j, k;
for (i = 0; i < count; i++)
dst[i] = i;
for (i = count - 1; i >= 0; i--) {
j = lrand48() % (i + 1);
k = dst[j];
dst[j] = dst[i];
dst[i] = src[k];
}
}
/*
* Perform I/O using read(2) and check the returned results against the
* expected result and the image reference data.
*/
static void
io_read(size_t pos, size_t len, size_t expected)
{
ssize_t bytes;
assert(len > 0 && len <= dev_size);
assert(expected <= len);
if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0);
memset(dev_buf, 0, len);
if ((bytes = read(dev_fd, dev_buf, len)) < 0) e(0);
if (bytes != expected) e(0);
if (memcmp(&dev_ref[pos], dev_buf, bytes)) e(0);
}
/*
* Perform I/O using write(2) and check the returned result against the
* expected result. Update the image reference data as appropriate.
*/
static void
io_write(size_t pos, size_t len, size_t expected)
{
ssize_t bytes;
assert(len > 0 && len <= dev_size);
assert(expected <= len);
if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0);
fill_buf(dev_buf, len);
if ((bytes = write(dev_fd, dev_buf, len)) < 0) e(0);
if (bytes != expected) e(0);
if (bytes > 0) {
assert(pos + bytes <= dev_size);
memcpy(&dev_ref[pos], dev_buf, bytes);
}
}
/*
* Test if reading from the given pointer succeeds or not, and return the
* result.
*/
static int
is_readable(char * ptr)
{
ssize_t r;
char byte;
/*
* If we access the pointer directly, we will get a fatal signal.
* Thus, for that to work we would need a child process, making the
* whole test slow and noisy. Let a service try the operation instead.
*/
r = write(pipe_fd[1], ptr, 1);
if (r == 1) {
/* Don't fill up the pipe. */
if (read(pipe_fd[0], &byte, 1) != 1) e(0);
return 1;
} else if (r != -1 || errno != EFAULT)
e(0);
return 0;
}
/*
* Perform I/O using mmap(2) and check the returned results against the
* expected result and the image reference data. Ensure that bytes beyond the
* device end are either zero (on the remainder of the last page) or
* inaccessible on pages entirely beyond the device end.
*/
static void
io_peek(size_t pos, size_t len, size_t expected)
{
size_t n, delta, mapped_size;
char *ptr;
assert(test_peek);
delta = pos % page_size;
pos -= delta;
len += delta;
len = roundup(len, page_size);
/* Don't bother with the given expected value. Recompute it. */
if (pos < dev_size)
expected = MIN(dev_size - pos, len);
else
expected = 0;
mapped_size = roundup(dev_size, page_size);
assert(!(len % page_size));
ptr = mmap(NULL, len, PROT_READ, MAP_PRIVATE | MAP_FILE, dev_fd,
(off_t)pos);
/*
* As of writing, VM allows memory mapping at any offset and for any
* length. At least for block devices, VM should probably be changed
* to throw ENXIO for any pages beyond the file end, which in turn
* renders all the SIGBUS tests below obsolete.
*/
if (ptr == MAP_FAILED) {
if (pos + len <= mapped_size) e(0);
if (errno != ENXIO) e(0);
return;
}
mmap_ptr = ptr;
mmap_size = len;
/*
* Any page that contains any valid part of the mapped device should be
* readable and have correct contents for that part. If the last valid
* page extends beyond the mapped device, its remainder should be zero.
*/
if (pos < dev_size) {
/* The valid part should have the expected device contents. */
if (memcmp(&dev_ref[pos], ptr, expected)) e(0);
/* The remainder, if any, should be zero. */
for (n = expected; n % page_size; n++)
if (ptr[n] != 0) e(0);
}
/*
* Any page entirely beyond EOF should not be mapped in. In order to
* ensure that is_readable() works, also test pages that are mapped in.
*/
for (n = pos; n < pos + len; n += page_size)
if (is_readable(&ptr[n - pos]) != (n < mapped_size)) e(0);
munmap(ptr, len);
mmap_ptr = NULL;
}
/*
* Perform one of the supported end-of-file access attempts using one I/O
* operation.
*/
static void
do_one_io(int where, void (* io_proc)(size_t, size_t, size_t))
{
size_t start, bytes;
switch (where) {
case BEFORE_EOF:
bytes = lrand48() % (dev_size - 1) + 1;
io_proc(dev_size - bytes - 1, bytes, bytes);
break;
case UPTO_EOF:
bytes = lrand48() % dev_size + 1;
io_proc(dev_size - bytes, bytes, bytes);
break;
case ACROSS_EOF:
start = lrand48() % (dev_size - 1) + 1;
bytes = dev_size - start + 1;
assert(start < dev_size && start + bytes > dev_size);
bytes += lrand48() % (dev_size - bytes + 1);
io_proc(start, bytes, dev_size - start);
break;
case ONEPAST_EOF:
bytes = lrand48() % (dev_size - 1) + 1;
io_proc(dev_size - bytes + 1, bytes, bytes - 1);
break;
case FROM_EOF:
bytes = lrand48() % dev_size + 1;
io_proc(dev_size, bytes, 0);
break;
case BEYOND_EOF:
start = dev_size + lrand48() % dev_size + 1;
bytes = lrand48() % dev_size + 1;
io_proc(start, bytes, 0);
break;
default:
assert(0);
}
}
/*
* Perform I/O operations, testing all the supported end-of-file access
* attempts in a random order so as to detect possible problems with caching.
*/
static void
do_io(void (* io_proc)(size_t, size_t, size_t))
{
static const int list[] = { BEFORE_EOF, UPTO_EOF, ACROSS_EOF,
ONEPAST_EOF, FROM_EOF, BEYOND_EOF };
static const int count = sizeof(list) / sizeof(list[0]);
int i, where[count];
scramble(where, list, count);
for (i = 0; i < count; i++)
do_one_io(where[i], io_proc);
}
/*
* Set up an image file of the given size, assign it to a VND, and open the
* resulting block device. The size is size_t because we keep a reference copy
* of its entire contents in memory.
*/
static void
setup_image(size_t size)
{
struct part_geom part;
size_t off;
ssize_t bytes;
int fd, status;
dev_size = size;
if ((dev_buf = malloc(dev_size)) == NULL) e(0);
if ((dev_ref = malloc(dev_size)) == NULL) e(0);
if ((fd = open("image", O_CREAT | O_TRUNC | O_RDWR, 0644)) < 0) e(0);
fill_buf(dev_ref, dev_size);
for (off = 0; off < dev_size; off += bytes) {
bytes = write(fd, &dev_ref[off], dev_size - off);
if (bytes <= 0) e(0);
}
close(fd);
status = system(VNCONFIG " vnd0 image 2>/dev/null");
if (!WIFEXITED(status)) e(0);
if (WEXITSTATUS(status) != 0) {
printf("skipped\n"); /* most likely cause: vnd0 is in use */
cleanup();
exit(0);
}
need_cleanup = 1;
if ((dev_fd = open("/dev/vnd0", O_RDWR)) < 0) e(0);
if (ioctl(dev_fd, DIOCGETP, &part) < 0) e(0);
if (part.size != dev_size) e(0);
}
/*
* Clean up the VND we set up previously. This function is also called in case
* of an unexpected exit.
*/
static void
cleanup_device(void)
{
int status;
if (!need_cleanup)
return;
if (mmap_ptr != NULL) {
munmap(mmap_ptr, mmap_size);
mmap_ptr = NULL;
}
if (dev_fd >= 0)
close(dev_fd);
status = system(VNCONFIG " -u vnd0 2>/dev/null");
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
need_cleanup = 0;
}
/*
* Signal handler for exceptions.
*/
static void
got_signal(int __unused sig)
{
cleanup_device();
exit(1);
}
/*
* Clean up the VND and image file we set up previously.
*/
static void
cleanup_image(void)
{
size_t off;
ssize_t bytes;
int fd;
cleanup_device();
if ((fd = open("image", O_RDONLY, 0644)) < 0) e(0);
for (off = 0; off < dev_size; off += bytes) {
bytes = read(fd, &dev_buf[off], dev_size - off);
if (bytes <= 0) e(0);
}
close(fd);
/* Have all changes written back to the device? */
if (memcmp(dev_buf, dev_ref, dev_size)) e(0);
unlink("image");
free(dev_buf);
free(dev_ref);
}
/*
* Run the full test for a block device with the given size.
*/
static void
do_test(size_t size)
{
int i;
/*
* Using the three I/O primitives (read, write, peek), we run four
* sequences, mainly to test the effects of blocks being cached or not.
* We set up a new image for each sequence, because -if everything goes
* right- closing the device file also clears all cached blocks for it,
* in both the root file system's cache and the VM cache. Note that we
* currently do not even attempt to push the blocks out of the root FS'
* cache in order to test retrieval from the VM cache, since this would
* involve doing a LOT of extra I/O.
*/
for (i = 0; i < 4; i++) {
setup_image(size);
switch (i) {
case 0:
do_io(io_read);
/* FALLTHROUGH */
case 1:
do_io(io_write);
do_io(io_read);
break;
case 2:
do_io(io_peek);
/* FALLTHROUGH */
case 3:
do_io(io_write);
do_io(io_peek);
break;
}
cleanup_image();
}
}
/*
* Test program for end-of-file conditions during block device I/O.
*/
int
main(void)
{
static const unsigned int blocks[] = { 1, 4, 3, 5, 2 };
struct statvfs buf;
int i, j;
start(85);
signal(SIGINT, got_signal);
signal(SIGABRT, got_signal);
signal(SIGSEGV, got_signal);
signal(SIGBUS, got_signal);
atexit(cleanup_device);
srand48(time(NULL));
if (pipe(pipe_fd) != 0) e(0);
/*
* Get the system page size, and align all memory mapping offsets and
* sizes accordingly.
*/
page_size = sysconf(_SC_PAGESIZE);
/*
* Get the root file system block size. In the current MINIX3 system
* architecture, the root file system's block size determines the
* transfer granularity for I/O on unmounted block devices. If this
* block size is not a multiple of the page size, we are (currently!)
* not expecting memory-mapped block devices to work.
*/
if (statvfs("/", &buf) < 0) e(0);
block_size = buf.f_bsize;
test_peek = !(block_size % page_size);
for (i = 0; i < ITERATIONS; i++) {
/*
* The 'blocks' array is scrambled so as to detect any blocks
* left in the VM cache (or not) across runs, just in case.
*/
for (j = 0; j < sizeof(blocks) / sizeof(blocks[0]); j++) {
do_test(blocks[j] * block_size + SECTOR_SIZE);
do_test(blocks[j] * block_size);
do_test(blocks[j] * block_size - SECTOR_SIZE);
}
}
quit();
}