6c46a77d95
- The lmfs_get_block*(3) API calls may now return an error. The idea is to encourage a next generation of file system services to do a better job at dealing with block read errors than the MFS-derived implementations do. These existing file systems have been changed to panic immediately upon getting a block read error, in order to let unchecked errors cause corruption. Note that libbdev already retries failing I/O operations a few times first. - The libminixfs block device I/O module (bio.c) now deals properly with end-of-file conditions on block devices. Since a device or partition size may not be a multiple of the root file system's block size, support for partial block retrival has been added, with a new internal lmfs_get_partial_block(3) call. A new test program, test85, tests the new handling of EOF conditions when reading, writing, and memory-mapping a block device. Change-Id: I05e35b6b8851488328a2679da635ebba0c6d08ce
541 lines
11 KiB
C
541 lines
11 KiB
C
/* Test for end-of-file during block device I/O - by D.C. van Moolenbroek */
|
|
/* This test needs to be run as root; it sets up and uses a VND instance. */
|
|
/*
|
|
* The test should work with all root file system block sizes, but only tests
|
|
* certain corner cases if the root FS block size is twice the page size.
|
|
*/
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <signal.h>
|
|
#include <sys/param.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/ioctl.h>
|
|
#include <minix/partition.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <assert.h>
|
|
|
|
#define VNCONFIG "/usr/sbin/vnconfig"
|
|
|
|
#define SECTOR_SIZE 512 /* this should be the sector size of VND */
|
|
|
|
#define ITERATIONS 3
|
|
|
|
enum {
|
|
BEFORE_EOF,
|
|
UPTO_EOF,
|
|
ACROSS_EOF,
|
|
ONEPAST_EOF,
|
|
FROM_EOF,
|
|
BEYOND_EOF
|
|
};
|
|
|
|
#include "common.h"
|
|
|
|
static int need_cleanup = 0;
|
|
|
|
static int dev_fd;
|
|
static size_t dev_size;
|
|
static char *dev_buf;
|
|
static char *dev_ref;
|
|
|
|
static size_t block_size;
|
|
static size_t page_size;
|
|
static int test_peek;
|
|
|
|
static char *mmap_ptr = NULL;
|
|
static size_t mmap_size;
|
|
|
|
static int pipe_fd[2];
|
|
|
|
/*
|
|
* Fill the given buffer with random contents.
|
|
*/
|
|
static void
|
|
fill_buf(char * buf, size_t size)
|
|
{
|
|
|
|
while (size--)
|
|
*buf++ = lrand48() & 0xff;
|
|
}
|
|
|
|
/*
|
|
* Place the elements of the source array in the destination array in random
|
|
* order. There are probably better ways to do this, but it is morning, and I
|
|
* haven't had coffee yet, so go away.
|
|
*/
|
|
static void
|
|
scramble(int * dst, const int * src, int count)
|
|
{
|
|
int i, j, k;
|
|
|
|
for (i = 0; i < count; i++)
|
|
dst[i] = i;
|
|
|
|
for (i = count - 1; i >= 0; i--) {
|
|
j = lrand48() % (i + 1);
|
|
|
|
k = dst[j];
|
|
dst[j] = dst[i];
|
|
dst[i] = src[k];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Perform I/O using read(2) and check the returned results against the
|
|
* expected result and the image reference data.
|
|
*/
|
|
static void
|
|
io_read(size_t pos, size_t len, size_t expected)
|
|
{
|
|
ssize_t bytes;
|
|
|
|
assert(len > 0 && len <= dev_size);
|
|
assert(expected <= len);
|
|
|
|
if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0);
|
|
|
|
memset(dev_buf, 0, len);
|
|
|
|
if ((bytes = read(dev_fd, dev_buf, len)) < 0) e(0);
|
|
|
|
if (bytes != expected) e(0);
|
|
|
|
if (memcmp(&dev_ref[pos], dev_buf, bytes)) e(0);
|
|
}
|
|
|
|
/*
|
|
* Perform I/O using write(2) and check the returned result against the
|
|
* expected result. Update the image reference data as appropriate.
|
|
*/
|
|
static void
|
|
io_write(size_t pos, size_t len, size_t expected)
|
|
{
|
|
ssize_t bytes;
|
|
|
|
assert(len > 0 && len <= dev_size);
|
|
assert(expected <= len);
|
|
|
|
if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0);
|
|
|
|
fill_buf(dev_buf, len);
|
|
|
|
if ((bytes = write(dev_fd, dev_buf, len)) < 0) e(0);
|
|
|
|
if (bytes != expected) e(0);
|
|
|
|
if (bytes > 0) {
|
|
assert(pos + bytes <= dev_size);
|
|
|
|
memcpy(&dev_ref[pos], dev_buf, bytes);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Test if reading from the given pointer succeeds or not, and return the
|
|
* result.
|
|
*/
|
|
static int
|
|
is_readable(char * ptr)
|
|
{
|
|
ssize_t r;
|
|
char byte;
|
|
|
|
/*
|
|
* If we access the pointer directly, we will get a fatal signal.
|
|
* Thus, for that to work we would need a child process, making the
|
|
* whole test slow and noisy. Let a service try the operation instead.
|
|
*/
|
|
r = write(pipe_fd[1], ptr, 1);
|
|
|
|
if (r == 1) {
|
|
/* Don't fill up the pipe. */
|
|
if (read(pipe_fd[0], &byte, 1) != 1) e(0);
|
|
|
|
return 1;
|
|
} else if (r != -1 || errno != EFAULT)
|
|
e(0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Perform I/O using mmap(2) and check the returned results against the
|
|
* expected result and the image reference data. Ensure that bytes beyond the
|
|
* device end are either zero (on the remainder of the last page) or
|
|
* inaccessible on pages entirely beyond the device end.
|
|
*/
|
|
static void
|
|
io_peek(size_t pos, size_t len, size_t expected)
|
|
{
|
|
size_t n, delta, mapped_size;
|
|
char *ptr;
|
|
|
|
assert(test_peek);
|
|
|
|
delta = pos % page_size;
|
|
|
|
pos -= delta;
|
|
len += delta;
|
|
|
|
len = roundup(len, page_size);
|
|
|
|
/* Don't bother with the given expected value. Recompute it. */
|
|
if (pos < dev_size)
|
|
expected = MIN(dev_size - pos, len);
|
|
else
|
|
expected = 0;
|
|
|
|
mapped_size = roundup(dev_size, page_size);
|
|
|
|
assert(!(len % page_size));
|
|
|
|
ptr = mmap(NULL, len, PROT_READ, MAP_PRIVATE | MAP_FILE, dev_fd,
|
|
(off_t)pos);
|
|
|
|
/*
|
|
* As of writing, VM allows memory mapping at any offset and for any
|
|
* length. At least for block devices, VM should probably be changed
|
|
* to throw ENXIO for any pages beyond the file end, which in turn
|
|
* renders all the SIGBUS tests below obsolete.
|
|
*/
|
|
if (ptr == MAP_FAILED) {
|
|
if (pos + len <= mapped_size) e(0);
|
|
if (errno != ENXIO) e(0);
|
|
|
|
return;
|
|
}
|
|
|
|
mmap_ptr = ptr;
|
|
mmap_size = len;
|
|
|
|
/*
|
|
* Any page that contains any valid part of the mapped device should be
|
|
* readable and have correct contents for that part. If the last valid
|
|
* page extends beyond the mapped device, its remainder should be zero.
|
|
*/
|
|
if (pos < dev_size) {
|
|
/* The valid part should have the expected device contents. */
|
|
if (memcmp(&dev_ref[pos], ptr, expected)) e(0);
|
|
|
|
/* The remainder, if any, should be zero. */
|
|
for (n = expected; n % page_size; n++)
|
|
if (ptr[n] != 0) e(0);
|
|
}
|
|
|
|
/*
|
|
* Any page entirely beyond EOF should not be mapped in. In order to
|
|
* ensure that is_readable() works, also test pages that are mapped in.
|
|
*/
|
|
for (n = pos; n < pos + len; n += page_size)
|
|
if (is_readable(&ptr[n - pos]) != (n < mapped_size)) e(0);
|
|
|
|
munmap(ptr, len);
|
|
|
|
mmap_ptr = NULL;
|
|
}
|
|
|
|
/*
|
|
* Perform one of the supported end-of-file access attempts using one I/O
|
|
* operation.
|
|
*/
|
|
static void
|
|
do_one_io(int where, void (* io_proc)(size_t, size_t, size_t))
|
|
{
|
|
size_t start, bytes;
|
|
|
|
switch (where) {
|
|
case BEFORE_EOF:
|
|
bytes = lrand48() % (dev_size - 1) + 1;
|
|
|
|
io_proc(dev_size - bytes - 1, bytes, bytes);
|
|
|
|
break;
|
|
|
|
case UPTO_EOF:
|
|
bytes = lrand48() % dev_size + 1;
|
|
|
|
io_proc(dev_size - bytes, bytes, bytes);
|
|
|
|
break;
|
|
|
|
case ACROSS_EOF:
|
|
start = lrand48() % (dev_size - 1) + 1;
|
|
bytes = dev_size - start + 1;
|
|
assert(start < dev_size && start + bytes > dev_size);
|
|
bytes += lrand48() % (dev_size - bytes + 1);
|
|
|
|
io_proc(start, bytes, dev_size - start);
|
|
|
|
break;
|
|
|
|
case ONEPAST_EOF:
|
|
bytes = lrand48() % (dev_size - 1) + 1;
|
|
|
|
io_proc(dev_size - bytes + 1, bytes, bytes - 1);
|
|
|
|
break;
|
|
|
|
case FROM_EOF:
|
|
bytes = lrand48() % dev_size + 1;
|
|
|
|
io_proc(dev_size, bytes, 0);
|
|
|
|
break;
|
|
|
|
case BEYOND_EOF:
|
|
start = dev_size + lrand48() % dev_size + 1;
|
|
bytes = lrand48() % dev_size + 1;
|
|
|
|
io_proc(start, bytes, 0);
|
|
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Perform I/O operations, testing all the supported end-of-file access
|
|
* attempts in a random order so as to detect possible problems with caching.
|
|
*/
|
|
static void
|
|
do_io(void (* io_proc)(size_t, size_t, size_t))
|
|
{
|
|
static const int list[] = { BEFORE_EOF, UPTO_EOF, ACROSS_EOF,
|
|
ONEPAST_EOF, FROM_EOF, BEYOND_EOF };
|
|
static const int count = sizeof(list) / sizeof(list[0]);
|
|
int i, where[count];
|
|
|
|
scramble(where, list, count);
|
|
|
|
for (i = 0; i < count; i++)
|
|
do_one_io(where[i], io_proc);
|
|
}
|
|
|
|
/*
|
|
* Set up an image file of the given size, assign it to a VND, and open the
|
|
* resulting block device. The size is size_t because we keep a reference copy
|
|
* of its entire contents in memory.
|
|
*/
|
|
static void
|
|
setup_image(size_t size)
|
|
{
|
|
struct part_geom part;
|
|
size_t off;
|
|
ssize_t bytes;
|
|
int fd, status;
|
|
|
|
dev_size = size;
|
|
if ((dev_buf = malloc(dev_size)) == NULL) e(0);
|
|
if ((dev_ref = malloc(dev_size)) == NULL) e(0);
|
|
|
|
if ((fd = open("image", O_CREAT | O_TRUNC | O_RDWR, 0644)) < 0) e(0);
|
|
|
|
fill_buf(dev_ref, dev_size);
|
|
|
|
for (off = 0; off < dev_size; off += bytes) {
|
|
bytes = write(fd, &dev_ref[off], dev_size - off);
|
|
|
|
if (bytes <= 0) e(0);
|
|
}
|
|
|
|
close(fd);
|
|
|
|
status = system(VNCONFIG " vnd0 image 2>/dev/null");
|
|
if (!WIFEXITED(status)) e(0);
|
|
if (WEXITSTATUS(status) != 0) {
|
|
printf("skipped\n"); /* most likely cause: vnd0 is in use */
|
|
cleanup();
|
|
exit(0);
|
|
}
|
|
|
|
need_cleanup = 1;
|
|
|
|
if ((dev_fd = open("/dev/vnd0", O_RDWR)) < 0) e(0);
|
|
|
|
if (ioctl(dev_fd, DIOCGETP, &part) < 0) e(0);
|
|
|
|
if (part.size != dev_size) e(0);
|
|
}
|
|
|
|
/*
|
|
* Clean up the VND we set up previously. This function is also called in case
|
|
* of an unexpected exit.
|
|
*/
|
|
static void
|
|
cleanup_device(void)
|
|
{
|
|
int status;
|
|
|
|
if (!need_cleanup)
|
|
return;
|
|
|
|
if (mmap_ptr != NULL) {
|
|
munmap(mmap_ptr, mmap_size);
|
|
|
|
mmap_ptr = NULL;
|
|
}
|
|
|
|
if (dev_fd >= 0)
|
|
close(dev_fd);
|
|
|
|
status = system(VNCONFIG " -u vnd0 2>/dev/null");
|
|
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
|
|
|
|
need_cleanup = 0;
|
|
}
|
|
|
|
/*
|
|
* Signal handler for exceptions.
|
|
*/
|
|
static void
|
|
got_signal(int __unused sig)
|
|
{
|
|
|
|
cleanup_device();
|
|
|
|
exit(1);
|
|
}
|
|
|
|
/*
|
|
* Clean up the VND and image file we set up previously.
|
|
*/
|
|
static void
|
|
cleanup_image(void)
|
|
{
|
|
size_t off;
|
|
ssize_t bytes;
|
|
int fd;
|
|
|
|
cleanup_device();
|
|
|
|
if ((fd = open("image", O_RDONLY, 0644)) < 0) e(0);
|
|
|
|
for (off = 0; off < dev_size; off += bytes) {
|
|
bytes = read(fd, &dev_buf[off], dev_size - off);
|
|
|
|
if (bytes <= 0) e(0);
|
|
}
|
|
|
|
close(fd);
|
|
|
|
/* Have all changes written back to the device? */
|
|
if (memcmp(dev_buf, dev_ref, dev_size)) e(0);
|
|
|
|
unlink("image");
|
|
|
|
free(dev_buf);
|
|
free(dev_ref);
|
|
}
|
|
|
|
/*
|
|
* Run the full test for a block device with the given size.
|
|
*/
|
|
static void
|
|
do_test(size_t size)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* Using the three I/O primitives (read, write, peek), we run four
|
|
* sequences, mainly to test the effects of blocks being cached or not.
|
|
* We set up a new image for each sequence, because -if everything goes
|
|
* right- closing the device file also clears all cached blocks for it,
|
|
* in both the root file system's cache and the VM cache. Note that we
|
|
* currently do not even attempt to push the blocks out of the root FS'
|
|
* cache in order to test retrieval from the VM cache, since this would
|
|
* involve doing a LOT of extra I/O.
|
|
*/
|
|
for (i = 0; i < 4; i++) {
|
|
setup_image(size);
|
|
|
|
switch (i) {
|
|
case 0:
|
|
do_io(io_read);
|
|
|
|
/* FALLTHROUGH */
|
|
case 1:
|
|
do_io(io_write);
|
|
|
|
do_io(io_read);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
do_io(io_peek);
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
case 3:
|
|
do_io(io_write);
|
|
|
|
do_io(io_peek);
|
|
|
|
break;
|
|
}
|
|
|
|
cleanup_image();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Test program for end-of-file conditions during block device I/O.
|
|
*/
|
|
int
|
|
main(void)
|
|
{
|
|
static const unsigned int blocks[] = { 1, 4, 3, 5, 2 };
|
|
struct statvfs buf;
|
|
int i, j;
|
|
|
|
start(85);
|
|
|
|
signal(SIGINT, got_signal);
|
|
signal(SIGABRT, got_signal);
|
|
signal(SIGSEGV, got_signal);
|
|
signal(SIGBUS, got_signal);
|
|
atexit(cleanup_device);
|
|
|
|
srand48(time(NULL));
|
|
|
|
if (pipe(pipe_fd) != 0) e(0);
|
|
|
|
/*
|
|
* Get the system page size, and align all memory mapping offsets and
|
|
* sizes accordingly.
|
|
*/
|
|
page_size = sysconf(_SC_PAGESIZE);
|
|
|
|
/*
|
|
* Get the root file system block size. In the current MINIX3 system
|
|
* architecture, the root file system's block size determines the
|
|
* transfer granularity for I/O on unmounted block devices. If this
|
|
* block size is not a multiple of the page size, we are (currently!)
|
|
* not expecting memory-mapped block devices to work.
|
|
*/
|
|
if (statvfs("/", &buf) < 0) e(0);
|
|
|
|
block_size = buf.f_bsize;
|
|
|
|
test_peek = !(block_size % page_size);
|
|
|
|
for (i = 0; i < ITERATIONS; i++) {
|
|
/*
|
|
* The 'blocks' array is scrambled so as to detect any blocks
|
|
* left in the VM cache (or not) across runs, just in case.
|
|
*/
|
|
for (j = 0; j < sizeof(blocks) / sizeof(blocks[0]); j++) {
|
|
do_test(blocks[j] * block_size + SECTOR_SIZE);
|
|
|
|
do_test(blocks[j] * block_size);
|
|
|
|
do_test(blocks[j] * block_size - SECTOR_SIZE);
|
|
}
|
|
}
|
|
|
|
quit();
|
|
}
|