libbdev: extended version
This version of libbdev support asynchronous communication, recovery after driver restarts, and retrying of failed transfer operations.
This commit is contained in:
parent
9221586f37
commit
ed007ca416
11 changed files with 989 additions and 55 deletions
|
@ -1,8 +1,10 @@
|
||||||
#ifndef __MINIX_BDEV_H
|
#ifndef _MINIX_BDEV_H
|
||||||
#define __MINIX_BDEV_H
|
#define _MINIX_BDEV_H
|
||||||
|
|
||||||
|
/* Common API. */
|
||||||
extern void bdev_driver(dev_t dev, char *label);
|
extern void bdev_driver(dev_t dev, char *label);
|
||||||
|
|
||||||
|
/* Synchronous API. */
|
||||||
extern int bdev_open(dev_t dev, int access);
|
extern int bdev_open(dev_t dev, int access);
|
||||||
extern int bdev_close(dev_t dev);
|
extern int bdev_close(dev_t dev);
|
||||||
|
|
||||||
|
@ -16,4 +18,28 @@ extern ssize_t bdev_scatter(dev_t dev, u64_t pos, iovec_t *vec, int count,
|
||||||
int flags);
|
int flags);
|
||||||
extern int bdev_ioctl(dev_t dev, int request, void *buf);
|
extern int bdev_ioctl(dev_t dev, int request, void *buf);
|
||||||
|
|
||||||
#endif /* __MINIX_BDEV_H */
|
/* Asynchronous API. */
|
||||||
|
typedef int bdev_id_t;
|
||||||
|
typedef void *bdev_param_t;
|
||||||
|
|
||||||
|
typedef void (*bdev_callback_t)(dev_t dev, bdev_id_t id, bdev_param_t param,
|
||||||
|
int result);
|
||||||
|
|
||||||
|
extern void bdev_flush_asyn(dev_t dev);
|
||||||
|
|
||||||
|
extern bdev_id_t bdev_read_asyn(dev_t dev, u64_t pos, char *buf, size_t count,
|
||||||
|
int flags, bdev_callback_t callback, bdev_param_t param);
|
||||||
|
extern bdev_id_t bdev_write_asyn(dev_t dev, u64_t pos, char *buf, size_t count,
|
||||||
|
int flags, bdev_callback_t callback, bdev_param_t param);
|
||||||
|
extern bdev_id_t bdev_gather_asyn(dev_t dev, u64_t pos, iovec_t *vec,
|
||||||
|
int count, int flags, bdev_callback_t callback, bdev_param_t param);
|
||||||
|
extern bdev_id_t bdev_scatter_asyn(dev_t dev, u64_t pos, iovec_t *vec,
|
||||||
|
int count, int flags, bdev_callback_t callback, bdev_param_t param);
|
||||||
|
extern bdev_id_t bdev_ioctl_asyn(dev_t dev, int request, void *buf,
|
||||||
|
bdev_callback_t callback, bdev_param_t param);
|
||||||
|
|
||||||
|
extern int bdev_wait_asyn(bdev_id_t id);
|
||||||
|
|
||||||
|
extern void bdev_reply_asyn(message *m);
|
||||||
|
|
||||||
|
#endif /* _MINIX_BDEV_H */
|
||||||
|
|
|
@ -3,6 +3,6 @@
|
||||||
|
|
||||||
LIB= bdev
|
LIB= bdev
|
||||||
|
|
||||||
SRCS= bdev.c ipc.c driver.c
|
SRCS= bdev.c driver.c call.c ipc.c minor.c
|
||||||
|
|
||||||
.include <bsd.lib.mk>
|
.include <bsd.lib.mk>
|
||||||
|
|
52
lib/libbdev/NOTES
Normal file
52
lib/libbdev/NOTES
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
Development notes regarding libbdev, by David van Moolenbroek.
|
||||||
|
|
||||||
|
|
||||||
|
GENERAL MODEL
|
||||||
|
|
||||||
|
This library is designed mainly for use by file servers. It essentially covers
|
||||||
|
two use cases: 1) use of the block device that contains the file system itself,
|
||||||
|
and 2) use of any block device for raw block I/O (on unmounted file systems)
|
||||||
|
performed by the root file server. In the first case, the file server is
|
||||||
|
responsible for opening and closing the block device, and recovery from a
|
||||||
|
driver restart involves reopening those minor devices. Regular file systems
|
||||||
|
should have one or at most two (for a separate journal) block devices open at
|
||||||
|
the same time, which is why NR_OPEN_DEVS is set to a value that is quite low.
|
||||||
|
In the second case, VFS is responsible for opening and closing the block device
|
||||||
|
(and performing IOCTLs), as well as reopening the block device on a driver
|
||||||
|
restart -- the root file server only gets raw I/O (and flush) requests.
|
||||||
|
|
||||||
|
At this time, libbdev considers only clean crashes (a crash-only model), and
|
||||||
|
does not support recovery from behavioral errors. Protocol errors are passed to
|
||||||
|
the user process, and generally do not have an effect on the overall state of
|
||||||
|
the library.
|
||||||
|
|
||||||
|
|
||||||
|
RETRY MODEL
|
||||||
|
|
||||||
|
The philosophy for recovering from driver restarts in libbdev can be formulated
|
||||||
|
as follows: we want to tolerate an unlimited number of driver restarts over a
|
||||||
|
long time, but we do not want to keep retrying individual requests across
|
||||||
|
driver restarts. As such, we do not keep track of driver restarts on a per-
|
||||||
|
driver basis, because that would mean we put a hard limit on the number of
|
||||||
|
restarts for that driver in total. Instead, there are two limits: a driver
|
||||||
|
restart limit that is kept on a per-request basis, failing only that request
|
||||||
|
when the limit is reached, and a driver restart limit that is kept during
|
||||||
|
recovery, limiting the number of restarts and eventually giving up on the
|
||||||
|
entire driver when even the recovery keeps failing (as no progress is made in
|
||||||
|
that case).
|
||||||
|
|
||||||
|
Each transfer request also has a transfer retry count. The assumption here is
|
||||||
|
that when a transfer request returns EIO, it can be retried and possibly
|
||||||
|
succeed upon repetition. The driver restart and transfer retry counts are
|
||||||
|
tracked independently and thus the first to hit the limit will fail the
|
||||||
|
request. The behavior should be the same for synchronous and asynchronous
|
||||||
|
requests in this respect.
|
||||||
|
|
||||||
|
It could happen that a new driver gets loaded after we have decided that the
|
||||||
|
current driver is unusable. This could be due to a race condition (VFS sends a
|
||||||
|
new-driver request after we've given up) or due to user interaction (the user
|
||||||
|
loads a replacement driver). The latter case may occur legitimately with raw
|
||||||
|
I/O on the root file server, so we must not mark the driver as unusable
|
||||||
|
forever. On the other hand, in the former case, we must not continue to send
|
||||||
|
I/O without first reopening the minor devices. For this reason, we do not clean
|
||||||
|
up the record of the minor devices when we mark a driver as unusable.
|
|
@ -1,12 +1,12 @@
|
||||||
/* libbdev - block device interfacing library, by D.C. van Moolenbroek */
|
/* libbdev - block device interfacing library, by D.C. van Moolenbroek */
|
||||||
|
|
||||||
/* This is a preliminary, bare-essentials-only version of this library. */
|
|
||||||
|
|
||||||
#include <minix/drivers.h>
|
#include <minix/drivers.h>
|
||||||
#include <minix/bdev.h>
|
#include <minix/bdev.h>
|
||||||
#include <minix/ioctl.h>
|
#include <minix/ioctl.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "const.h"
|
||||||
|
#include "type.h"
|
||||||
#include "proto.h"
|
#include "proto.h"
|
||||||
|
|
||||||
void bdev_driver(dev_t dev, char *label)
|
void bdev_driver(dev_t dev, char *label)
|
||||||
|
@ -26,18 +26,55 @@ void bdev_driver(dev_t dev, char *label)
|
||||||
bdev_update(dev, label);
|
bdev_update(dev, label);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bdev_retry(int *driver_tries, int *transfer_tries, int *result)
|
||||||
|
{
|
||||||
|
/* Return TRUE iff the call result implies that we should retry the operation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
switch (*result) {
|
||||||
|
case ERESTART:
|
||||||
|
/* We get this error internally if the driver has restarted and the
|
||||||
|
* current operation may now go through. Check the retry count for
|
||||||
|
* driver restarts first, as we don't want to keep trying forever.
|
||||||
|
*/
|
||||||
|
if (++*driver_tries < DRIVER_TRIES)
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
*result = EDEADSRCDST;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case EIO:
|
||||||
|
/* The 'transfer_tries' pointer is non-NULL if this was a transfer
|
||||||
|
* request. If we get back an I/O failure, keep retrying the request
|
||||||
|
* until we hit the transfer retry limit.
|
||||||
|
*/
|
||||||
|
if (transfer_tries != NULL && ++*transfer_tries < TRANSFER_TRIES)
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
static int bdev_opcl(int req, dev_t dev, int access)
|
static int bdev_opcl(int req, dev_t dev, int access)
|
||||||
{
|
{
|
||||||
/* Open or close the given minor device.
|
/* Open or close the given minor device.
|
||||||
*/
|
*/
|
||||||
message m;
|
message m;
|
||||||
|
int r, driver_tries = 0;
|
||||||
|
|
||||||
memset(&m, 0, sizeof(m));
|
do {
|
||||||
m.m_type = req;
|
memset(&m, 0, sizeof(m));
|
||||||
m.BDEV_MINOR = minor(dev);
|
m.m_type = req;
|
||||||
m.BDEV_ACCESS = access;
|
m.BDEV_MINOR = minor(dev);
|
||||||
|
m.BDEV_ACCESS = access;
|
||||||
|
|
||||||
return bdev_sendrec(dev, &m);
|
r = bdev_sendrec(dev, &m);
|
||||||
|
} while (bdev_retry(&driver_tries, NULL, &r));
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bdev_open(dev_t dev, int access)
|
int bdev_open(dev_t dev, int access)
|
||||||
|
@ -45,8 +82,14 @@ int bdev_open(dev_t dev, int access)
|
||||||
/* Open the given minor device.
|
/* Open the given minor device.
|
||||||
* File system usage note: typically called from mount, after bdev_driver.
|
* File system usage note: typically called from mount, after bdev_driver.
|
||||||
*/
|
*/
|
||||||
|
int r;
|
||||||
|
|
||||||
return bdev_opcl(BDEV_OPEN, dev, access);
|
r = bdev_opcl(BDEV_OPEN, dev, access);
|
||||||
|
|
||||||
|
if (r == OK)
|
||||||
|
bdev_minor_add(dev, access);
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bdev_close(dev_t dev)
|
int bdev_close(dev_t dev)
|
||||||
|
@ -54,8 +97,16 @@ int bdev_close(dev_t dev)
|
||||||
/* Close the given minor device.
|
/* Close the given minor device.
|
||||||
* File system usage note: typically called from unmount.
|
* File system usage note: typically called from unmount.
|
||||||
*/
|
*/
|
||||||
|
int r;
|
||||||
|
|
||||||
return bdev_opcl(BDEV_CLOSE, dev, 0);
|
bdev_flush_asyn(dev);
|
||||||
|
|
||||||
|
r = bdev_opcl(BDEV_CLOSE, dev, 0);
|
||||||
|
|
||||||
|
if (r == OK)
|
||||||
|
bdev_minor_del(dev);
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bdev_rdwt_setup(int req, dev_t dev, u64_t pos, char *buf,
|
static int bdev_rdwt_setup(int req, dev_t dev, u64_t pos, char *buf,
|
||||||
|
@ -93,7 +144,7 @@ static int bdev_rdwt_setup(int req, dev_t dev, u64_t pos, char *buf,
|
||||||
return OK;
|
return OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bdev_rdwt_cleanup(message *m)
|
static void bdev_rdwt_cleanup(const message *m)
|
||||||
{
|
{
|
||||||
/* Clean up a single-buffer read/write request.
|
/* Clean up a single-buffer read/write request.
|
||||||
*/
|
*/
|
||||||
|
@ -104,17 +155,19 @@ static void bdev_rdwt_cleanup(message *m)
|
||||||
static ssize_t bdev_rdwt(int req, dev_t dev, u64_t pos, char *buf,
|
static ssize_t bdev_rdwt(int req, dev_t dev, u64_t pos, char *buf,
|
||||||
size_t count, int flags)
|
size_t count, int flags)
|
||||||
{
|
{
|
||||||
/* Perform a read or write call using a single buffer.
|
/* Perform a synchronous read or write call using a single buffer.
|
||||||
*/
|
*/
|
||||||
message m;
|
message m;
|
||||||
int r;
|
int r, driver_tries = 0, transfer_tries = 0;
|
||||||
|
|
||||||
if ((r = bdev_rdwt_setup(req, dev, pos, buf, count, flags, &m)) != OK)
|
do {
|
||||||
return r;
|
if ((r = bdev_rdwt_setup(req, dev, pos, buf, count, flags, &m)) != OK)
|
||||||
|
break;
|
||||||
|
|
||||||
r = bdev_sendrec(dev, &m);
|
r = bdev_sendrec(dev, &m);
|
||||||
|
|
||||||
bdev_rdwt_cleanup(&m);
|
bdev_rdwt_cleanup(&m);
|
||||||
|
} while (bdev_retry(&driver_tries, &transfer_tries, &r));
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -182,7 +235,7 @@ static int bdev_vrdwt_setup(int req, dev_t dev, u64_t pos, iovec_t *vec,
|
||||||
return OK;
|
return OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bdev_vrdwt_cleanup(message *m, iovec_s_t *gvec)
|
static void bdev_vrdwt_cleanup(const message *m, iovec_s_t *gvec)
|
||||||
{
|
{
|
||||||
/* Clean up a vectored read/write request.
|
/* Clean up a vectored read/write request.
|
||||||
*/
|
*/
|
||||||
|
@ -200,25 +253,28 @@ static void bdev_vrdwt_cleanup(message *m, iovec_s_t *gvec)
|
||||||
static ssize_t bdev_vrdwt(int req, dev_t dev, u64_t pos, iovec_t *vec,
|
static ssize_t bdev_vrdwt(int req, dev_t dev, u64_t pos, iovec_t *vec,
|
||||||
int count, int flags)
|
int count, int flags)
|
||||||
{
|
{
|
||||||
/* Perform a read or write call using a vector of buffers.
|
/* Perform a synchronous read or write call using a vector of buffers.
|
||||||
*/
|
*/
|
||||||
iovec_s_t gvec[NR_IOREQS];
|
iovec_s_t gvec[NR_IOREQS];
|
||||||
message m;
|
message m;
|
||||||
int r;
|
int r, driver_tries = 0, transfer_tries = 0;
|
||||||
|
|
||||||
if ((r = bdev_vrdwt_setup(req, dev, pos, vec, count, flags, &m, gvec)) != OK)
|
do {
|
||||||
return r;
|
if ((r = bdev_vrdwt_setup(req, dev, pos, vec, count, flags, &m,
|
||||||
|
gvec)) != OK)
|
||||||
|
break;
|
||||||
|
|
||||||
r = bdev_sendrec(dev, &m);
|
r = bdev_sendrec(dev, &m);
|
||||||
|
|
||||||
bdev_vrdwt_cleanup(&m, gvec);
|
bdev_vrdwt_cleanup(&m, gvec);
|
||||||
|
} while (bdev_retry(&driver_tries, &transfer_tries, &r));
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t bdev_read(dev_t dev, u64_t pos, char *buf, size_t count, int flags)
|
ssize_t bdev_read(dev_t dev, u64_t pos, char *buf, size_t count, int flags)
|
||||||
{
|
{
|
||||||
/* Perform a read call into a single buffer.
|
/* Perform a synchronous read call into a single buffer.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return bdev_rdwt(BDEV_READ, dev, pos, buf, count, flags);
|
return bdev_rdwt(BDEV_READ, dev, pos, buf, count, flags);
|
||||||
|
@ -226,7 +282,7 @@ ssize_t bdev_read(dev_t dev, u64_t pos, char *buf, size_t count, int flags)
|
||||||
|
|
||||||
ssize_t bdev_write(dev_t dev, u64_t pos, char *buf, size_t count, int flags)
|
ssize_t bdev_write(dev_t dev, u64_t pos, char *buf, size_t count, int flags)
|
||||||
{
|
{
|
||||||
/* Perform a write call from a single buffer.
|
/* Perform a synchronous write call from a single buffer.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return bdev_rdwt(BDEV_WRITE, dev, pos, buf, count, flags);
|
return bdev_rdwt(BDEV_WRITE, dev, pos, buf, count, flags);
|
||||||
|
@ -234,7 +290,7 @@ ssize_t bdev_write(dev_t dev, u64_t pos, char *buf, size_t count, int flags)
|
||||||
|
|
||||||
ssize_t bdev_gather(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags)
|
ssize_t bdev_gather(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags)
|
||||||
{
|
{
|
||||||
/* Perform a read call into a vector of buffers.
|
/* Perform a synchronous read call into a vector of buffers.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return bdev_vrdwt(BDEV_GATHER, dev, pos, vec, count, flags);
|
return bdev_vrdwt(BDEV_GATHER, dev, pos, vec, count, flags);
|
||||||
|
@ -242,7 +298,7 @@ ssize_t bdev_gather(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags)
|
||||||
|
|
||||||
ssize_t bdev_scatter(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags)
|
ssize_t bdev_scatter(dev_t dev, u64_t pos, iovec_t *vec, int count, int flags)
|
||||||
{
|
{
|
||||||
/* Perform a write call from a vector of buffers.
|
/* Perform a synchronous write call from a vector of buffers.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return bdev_vrdwt(BDEV_SCATTER, dev, pos, vec, count, flags);
|
return bdev_vrdwt(BDEV_SCATTER, dev, pos, vec, count, flags);
|
||||||
|
@ -286,7 +342,7 @@ static int bdev_ioctl_setup(dev_t dev, int request, void *buf, message *m)
|
||||||
return OK;
|
return OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bdev_ioctl_cleanup(message *m)
|
static void bdev_ioctl_cleanup(const message *m)
|
||||||
{
|
{
|
||||||
/* Clean up an I/O control request.
|
/* Clean up an I/O control request.
|
||||||
*/
|
*/
|
||||||
|
@ -296,17 +352,287 @@ static void bdev_ioctl_cleanup(message *m)
|
||||||
|
|
||||||
int bdev_ioctl(dev_t dev, int request, void *buf)
|
int bdev_ioctl(dev_t dev, int request, void *buf)
|
||||||
{
|
{
|
||||||
/* Perform an I/O control request.
|
/* Perform a synchronous I/O control request.
|
||||||
*/
|
*/
|
||||||
message m;
|
message m;
|
||||||
int r;
|
int r, driver_tries = 0;
|
||||||
|
|
||||||
if ((r = bdev_ioctl_setup(dev, request, buf, &m)) != OK)
|
do {
|
||||||
return r;
|
if ((r = bdev_ioctl_setup(dev, request, buf, &m)) != OK)
|
||||||
|
break;
|
||||||
|
|
||||||
r = bdev_sendrec(dev, &m);
|
r = bdev_sendrec(dev, &m);
|
||||||
|
|
||||||
bdev_ioctl_cleanup(&m);
|
bdev_ioctl_cleanup(&m);
|
||||||
|
} while (bdev_retry(&driver_tries, NULL, &r));
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bdev_flush_asyn(dev_t dev)
|
||||||
|
{
|
||||||
|
/* Flush all ongoing asynchronous requests to the given minor device. This
|
||||||
|
* involves blocking until all I/O for it has completed.
|
||||||
|
* File system usage note: typically called from flush.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
|
||||||
|
while ((call = bdev_call_find(dev)) != NULL)
|
||||||
|
(void) bdev_wait_asyn(call->id);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bdev_id_t bdev_rdwt_asyn(int req, dev_t dev, u64_t pos, char *buf,
|
||||||
|
size_t count, int flags, bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous read or write call using a single buffer.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if ((call = bdev_call_alloc(1)) == NULL)
|
||||||
|
return ENOMEM;
|
||||||
|
|
||||||
|
if ((r = bdev_rdwt_setup(req, dev, pos, buf, count, flags, &call->msg)) !=
|
||||||
|
OK) {
|
||||||
|
bdev_call_free(call);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((r = bdev_senda(dev, &call->msg, call->id)) != OK) {
|
||||||
|
bdev_rdwt_cleanup(&call->msg);
|
||||||
|
|
||||||
|
bdev_call_free(call);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
call->dev = dev;
|
||||||
|
call->callback = callback;
|
||||||
|
call->param = param;
|
||||||
|
call->driver_tries = 0;
|
||||||
|
call->transfer_tries = 0;
|
||||||
|
call->vec[0].iov_addr = (vir_bytes) buf;
|
||||||
|
call->vec[0].iov_size = count;
|
||||||
|
|
||||||
|
return call->id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bdev_id_t bdev_vrdwt_asyn(int req, dev_t dev, u64_t pos, iovec_t *vec,
|
||||||
|
int count, int flags, bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous read or write call using a vector of buffers.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if ((call = bdev_call_alloc(count)) == NULL)
|
||||||
|
return ENOMEM;
|
||||||
|
|
||||||
|
if ((r = bdev_vrdwt_setup(req, dev, pos, vec, count, flags, &call->msg,
|
||||||
|
call->gvec)) != OK) {
|
||||||
|
bdev_call_free(call);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((r = bdev_senda(dev, &call->msg, call->id)) != OK) {
|
||||||
|
bdev_vrdwt_cleanup(&call->msg, call->gvec);
|
||||||
|
|
||||||
|
bdev_call_free(call);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
call->dev = dev;
|
||||||
|
call->callback = callback;
|
||||||
|
call->param = param;
|
||||||
|
call->driver_tries = 0;
|
||||||
|
call->transfer_tries = 0;
|
||||||
|
memcpy(call->vec, vec, sizeof(vec[0]) * count);
|
||||||
|
|
||||||
|
return call->id;
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_id_t bdev_read_asyn(dev_t dev, u64_t pos, char *buf, size_t count,
|
||||||
|
int flags, bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous read call into a single buffer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return bdev_rdwt_asyn(BDEV_READ, dev, pos, buf, count, flags, callback,
|
||||||
|
param);
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_id_t bdev_write_asyn(dev_t dev, u64_t pos, char *buf, size_t count,
|
||||||
|
int flags, bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous write call from a single buffer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return bdev_rdwt_asyn(BDEV_WRITE, dev, pos, buf, count, flags, callback,
|
||||||
|
param);
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_id_t bdev_gather_asyn(dev_t dev, u64_t pos, iovec_t *vec, int count,
|
||||||
|
int flags, bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous read call into a vector of buffers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return bdev_vrdwt_asyn(BDEV_GATHER, dev, pos, vec, count, flags, callback,
|
||||||
|
param);
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_id_t bdev_scatter_asyn(dev_t dev, u64_t pos, iovec_t *vec, int count,
|
||||||
|
int flags, bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous write call into a vector of buffers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return bdev_vrdwt_asyn(BDEV_SCATTER, dev, pos, vec, count, flags, callback,
|
||||||
|
param);
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_id_t bdev_ioctl_asyn(dev_t dev, int request, void *buf,
|
||||||
|
bdev_callback_t callback, bdev_param_t param)
|
||||||
|
{
|
||||||
|
/* Perform an asynchronous I/O control request.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if ((call = bdev_call_alloc(1)) == NULL)
|
||||||
|
return ENOMEM;
|
||||||
|
|
||||||
|
if ((r = bdev_ioctl_setup(dev, request, buf, &call->msg)) != OK) {
|
||||||
|
bdev_call_free(call);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((r = bdev_senda(dev, &call->msg, call->id)) != OK) {
|
||||||
|
bdev_ioctl_cleanup(&call->msg);
|
||||||
|
|
||||||
|
bdev_call_free(call);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
call->dev = dev;
|
||||||
|
call->callback = callback;
|
||||||
|
call->param = param;
|
||||||
|
call->driver_tries = 0;
|
||||||
|
call->vec[0].iov_addr = (vir_bytes) buf;
|
||||||
|
|
||||||
|
return call->id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdev_callback_asyn(bdev_call_t *call, int result)
|
||||||
|
{
|
||||||
|
/* Perform the callback for an asynchronous request, with the given result.
|
||||||
|
* Clean up the call structure afterwards.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* If this was a transfer request and the result is EIO, we may want to retry
|
||||||
|
* the request first.
|
||||||
|
*/
|
||||||
|
switch (call->msg.m_type) {
|
||||||
|
case BDEV_READ:
|
||||||
|
case BDEV_WRITE:
|
||||||
|
case BDEV_GATHER:
|
||||||
|
case BDEV_SCATTER:
|
||||||
|
if (result == EIO && ++call->transfer_tries < TRANSFER_TRIES) {
|
||||||
|
result = bdev_senda(call->dev, &call->msg, call->id);
|
||||||
|
|
||||||
|
if (result == OK)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clean up. */
|
||||||
|
switch (call->msg.m_type) {
|
||||||
|
case BDEV_READ:
|
||||||
|
case BDEV_WRITE:
|
||||||
|
bdev_rdwt_cleanup(&call->msg);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BDEV_GATHER:
|
||||||
|
case BDEV_SCATTER:
|
||||||
|
bdev_vrdwt_cleanup(&call->msg, call->gvec);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BDEV_IOCTL:
|
||||||
|
bdev_ioctl_cleanup(&call->msg);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Call the callback function. */
|
||||||
|
/* FIXME: we assume all reasonable ssize_t values can be stored in an int. */
|
||||||
|
call->callback(call->dev, call->id, call->param, result);
|
||||||
|
|
||||||
|
/* Free up the call structure. */
|
||||||
|
bdev_call_free(call);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bdev_restart_asyn(bdev_call_t *call)
|
||||||
|
{
|
||||||
|
/* The driver for the given call has restarted, and may now have a new
|
||||||
|
* endpoint. Recreate and resend the request for the given call.
|
||||||
|
*/
|
||||||
|
int type, r = OK;
|
||||||
|
|
||||||
|
/* Update and check the retry limit for driver restarts first. */
|
||||||
|
if (++call->driver_tries >= DRIVER_TRIES)
|
||||||
|
return EDEADSRCDST;
|
||||||
|
|
||||||
|
/* Recreate all grants for the new endpoint. */
|
||||||
|
type = call->msg.m_type;
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case BDEV_READ:
|
||||||
|
case BDEV_WRITE:
|
||||||
|
bdev_rdwt_cleanup(&call->msg);
|
||||||
|
|
||||||
|
r = bdev_rdwt_setup(type, call->dev,
|
||||||
|
make64(call->msg.BDEV_POS_LO, call->msg.BDEV_POS_HI),
|
||||||
|
(char *) call->vec[0].iov_addr, call->msg.BDEV_COUNT,
|
||||||
|
call->msg.BDEV_FLAGS, &call->msg);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BDEV_GATHER:
|
||||||
|
case BDEV_SCATTER:
|
||||||
|
bdev_vrdwt_cleanup(&call->msg, call->gvec);
|
||||||
|
|
||||||
|
r = bdev_vrdwt_setup(type, call->dev,
|
||||||
|
make64(call->msg.BDEV_POS_LO, call->msg.BDEV_POS_HI),
|
||||||
|
call->vec, call->msg.BDEV_COUNT, call->msg.BDEV_FLAGS,
|
||||||
|
&call->msg, call->gvec);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BDEV_IOCTL:
|
||||||
|
bdev_ioctl_cleanup(&call->msg);
|
||||||
|
|
||||||
|
r = bdev_ioctl_setup(call->dev, call->msg.BDEV_REQUEST,
|
||||||
|
(char *) call->vec[0].iov_addr, &call->msg);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r != OK)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
/* Try to resend the request. */
|
||||||
|
return bdev_senda(call->dev, &call->msg, call->id);
|
||||||
|
}
|
||||||
|
|
118
lib/libbdev/call.c
Normal file
118
lib/libbdev/call.c
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
/* libbdev - asynchronous call structure management */
|
||||||
|
|
||||||
|
#include <minix/drivers.h>
|
||||||
|
#include <minix/bdev.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "const.h"
|
||||||
|
#include "type.h"
|
||||||
|
#include "proto.h"
|
||||||
|
|
||||||
|
static bdev_call_t *calls[NR_CALLS];
|
||||||
|
|
||||||
|
bdev_call_t *bdev_call_alloc(int count)
|
||||||
|
{
|
||||||
|
/* Allocate a call structure.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
bdev_id_t id;
|
||||||
|
|
||||||
|
for (id = 0; id < NR_CALLS; id++)
|
||||||
|
if (calls[id] == NULL)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (id == NR_CALLS)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
call = malloc(sizeof(bdev_call_t) +
|
||||||
|
sizeof(call->gvec[0]) * (count - 1) +
|
||||||
|
sizeof(call->vec[0]) * count);
|
||||||
|
|
||||||
|
if (call == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
call->id = id;
|
||||||
|
call->vec = (iovec_t *) &call->gvec[count];
|
||||||
|
|
||||||
|
calls[id] = call;
|
||||||
|
|
||||||
|
return call;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdev_call_free(bdev_call_t *call)
|
||||||
|
{
|
||||||
|
/* Free a call structure.
|
||||||
|
*/
|
||||||
|
|
||||||
|
assert(calls[call->id] == call);
|
||||||
|
|
||||||
|
calls[call->id] = NULL;
|
||||||
|
|
||||||
|
free(call);
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_call_t *bdev_call_get(bdev_id_t id)
|
||||||
|
{
|
||||||
|
/* Retrieve a call structure by request number.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (id < 0 || id >= NR_CALLS)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return calls[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_call_t *bdev_call_find(dev_t dev)
|
||||||
|
{
|
||||||
|
/* Find the first asynchronous request for the given device, if any.
|
||||||
|
*/
|
||||||
|
bdev_id_t id;
|
||||||
|
|
||||||
|
for (id = 0; id < NR_CALLS; id++)
|
||||||
|
if (calls[id] != NULL && calls[id]->dev == dev)
|
||||||
|
return calls[id];
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_call_t *bdev_call_iter_maj(dev_t dev, bdev_call_t *call,
|
||||||
|
bdev_call_t **next)
|
||||||
|
{
|
||||||
|
/* Iterate over all asynchronous requests for a major device. This function
|
||||||
|
* must be safe even if the returned call structure is freed.
|
||||||
|
*/
|
||||||
|
bdev_id_t id;
|
||||||
|
int major;
|
||||||
|
|
||||||
|
major = major(dev);
|
||||||
|
|
||||||
|
/* If this is the first invocation, find the first match. Otherwise, take the
|
||||||
|
* call we found to be next in the last invocation, which may be NULL.
|
||||||
|
*/
|
||||||
|
if (call == NULL) {
|
||||||
|
for (id = 0; id < NR_CALLS; id++)
|
||||||
|
if (calls[id] != NULL && major(calls[id]->dev) == major)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (id == NR_CALLS)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
call = calls[id];
|
||||||
|
} else {
|
||||||
|
if ((call = *next) == NULL)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Look for the next match, if any. */
|
||||||
|
*next = NULL;
|
||||||
|
|
||||||
|
for (id = call->id + 1; id < NR_CALLS; id++) {
|
||||||
|
if (calls[id] != NULL && major(calls[id]->dev) == major) {
|
||||||
|
*next = calls[id];
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return call;
|
||||||
|
}
|
|
@ -1,7 +1,17 @@
|
||||||
#ifndef _BDEV_CONST_H
|
#ifndef _BDEV_CONST_H
|
||||||
#define _BDEV_CONST_H
|
#define _BDEV_CONST_H
|
||||||
|
|
||||||
|
#define NR_CALLS 256 /* maximum number of concurrent async calls */
|
||||||
|
|
||||||
|
#define NO_ID (-1) /* ID for synchronous requests */
|
||||||
|
|
||||||
#define DS_NR_TRIES 100 /* number of times to check endpoint in DS */
|
#define DS_NR_TRIES 100 /* number of times to check endpoint in DS */
|
||||||
#define DS_DELAY 50000 /* delay time (us) between DS checks */
|
#define DS_DELAY 50000 /* delay time (us) between DS checks */
|
||||||
|
|
||||||
|
#define DRIVER_TRIES 10 /* after so many tries, give up on a driver */
|
||||||
|
#define RECOVER_TRIES 2 /* tolerated nr of restarts during recovery */
|
||||||
|
#define TRANSFER_TRIES 5 /* number of times to try transfers on EIO */
|
||||||
|
|
||||||
|
#define NR_OPEN_DEVS 4 /* maximum different opened minor devices */
|
||||||
|
|
||||||
#endif /* _BDEV_CONST_H */
|
#endif /* _BDEV_CONST_H */
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#include "const.h"
|
#include "const.h"
|
||||||
|
#include "type.h"
|
||||||
#include "proto.h"
|
#include "proto.h"
|
||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
#include <minix/bdev.h>
|
#include <minix/bdev.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "const.h"
|
||||||
|
#include "type.h"
|
||||||
#include "proto.h"
|
#include "proto.h"
|
||||||
|
|
||||||
static void bdev_cancel(dev_t dev)
|
static void bdev_cancel(dev_t dev)
|
||||||
|
@ -11,14 +13,83 @@ static void bdev_cancel(dev_t dev)
|
||||||
/* Recovering the driver for the given device has failed repeatedly. Mark it as
|
/* Recovering the driver for the given device has failed repeatedly. Mark it as
|
||||||
* permanently unusable, and clean up any associated calls and resources.
|
* permanently unusable, and clean up any associated calls and resources.
|
||||||
*/
|
*/
|
||||||
|
bdev_call_t *call, *next;
|
||||||
|
|
||||||
printf("bdev: driver for major %d (endpoint %d) crashed\n",
|
printf("bdev: giving up on major %d\n", major(dev));
|
||||||
major(dev), bdev_driver_get(dev));
|
|
||||||
|
/* Cancel all pending asynchronous requests. */
|
||||||
|
call = NULL;
|
||||||
|
|
||||||
|
while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL)
|
||||||
|
bdev_callback_asyn(call, EDEADSRCDST);
|
||||||
|
|
||||||
/* Mark the driver as unusable. */
|
/* Mark the driver as unusable. */
|
||||||
bdev_driver_clear(dev);
|
bdev_driver_clear(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bdev_recover(dev_t dev, int update_endpt)
|
||||||
|
{
|
||||||
|
/* The IPC subsystem has signaled an error communicating to the driver
|
||||||
|
* associated with the given device. Try to recover. If 'update_endpt' is set,
|
||||||
|
* we need to find the new endpoint of the driver first. Return TRUE iff
|
||||||
|
* recovery has been successful.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call, *next;
|
||||||
|
endpoint_t endpt;
|
||||||
|
int r, nr_tries;
|
||||||
|
|
||||||
|
printf("bdev: recovering from a driver crash on major %d\n", major(dev));
|
||||||
|
|
||||||
|
for (nr_tries = 0; nr_tries < RECOVER_TRIES; nr_tries++) {
|
||||||
|
/* First update the endpoint, if necessary. */
|
||||||
|
if (update_endpt)
|
||||||
|
(void) bdev_driver_update(dev);
|
||||||
|
|
||||||
|
if ((endpt = bdev_driver_get(dev)) == NONE)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* If anything goes wrong, update the endpoint again next time. */
|
||||||
|
update_endpt = TRUE;
|
||||||
|
|
||||||
|
/* Reopen all minor devices on the new driver. */
|
||||||
|
if ((r = bdev_minor_reopen(dev)) != OK) {
|
||||||
|
/* If the driver died again, we may give it another try. */
|
||||||
|
if (r == EDEADSRCDST)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* If another error occurred, we cannot continue using the
|
||||||
|
* driver as is, but we also cannot force it to restart.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Resend all asynchronous requests. */
|
||||||
|
call = NULL;
|
||||||
|
|
||||||
|
while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL) {
|
||||||
|
/* It is not strictly necessary that we manage to reissue all
|
||||||
|
* asynchronous requests successfully. We can fail them on an
|
||||||
|
* individual basis here, without affecting the overall
|
||||||
|
* recovery. Note that we will never get new IPC failures here.
|
||||||
|
*/
|
||||||
|
if ((r = bdev_restart_asyn(call)) != OK)
|
||||||
|
bdev_callback_asyn(call, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recovery seems successful. We can now reissue the current
|
||||||
|
* synchronous request (if any), and continue normal operation.
|
||||||
|
*/
|
||||||
|
printf("bdev: recovery successful, new driver is at %d\n", endpt);
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recovery failed repeatedly. Give up on this driver. */
|
||||||
|
bdev_cancel(dev);
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
void bdev_update(dev_t dev, char *label)
|
void bdev_update(dev_t dev, char *label)
|
||||||
{
|
{
|
||||||
/* Set the endpoint for a driver. Perform recovery if necessary.
|
/* Set the endpoint for a driver. Perform recovery if necessary.
|
||||||
|
@ -32,13 +103,40 @@ void bdev_update(dev_t dev, char *label)
|
||||||
/* If updating the driver causes an endpoint change, we need to perform
|
/* If updating the driver causes an endpoint change, we need to perform
|
||||||
* recovery, but not update the endpoint yet again.
|
* recovery, but not update the endpoint yet again.
|
||||||
*/
|
*/
|
||||||
|
if (old_endpt != NONE && old_endpt != endpt)
|
||||||
|
bdev_recover(dev, FALSE /*update_endpt*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bdev_senda(dev_t dev, const message *m_orig, bdev_id_t id)
|
||||||
|
{
|
||||||
|
/* Send an asynchronous request for the given device. This function will never
|
||||||
|
* get any new IPC errors sending to the driver. If sending an asynchronous
|
||||||
|
* request fails, we will find out through other ways later.
|
||||||
|
*/
|
||||||
|
endpoint_t endpt;
|
||||||
|
message m;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
/* If we have no usable driver endpoint, fail instantly. */
|
||||||
|
if ((endpt = bdev_driver_get(dev)) == NONE)
|
||||||
|
return EDEADSRCDST;
|
||||||
|
|
||||||
|
m = *m_orig;
|
||||||
|
m.BDEV_ID = id;
|
||||||
|
|
||||||
|
r = asynsend(endpt, &m);
|
||||||
|
|
||||||
|
if (r != OK)
|
||||||
|
printf("bdev: asynsend to driver (%d) failed (%d)\n", endpt, r);
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bdev_sendrec(dev_t dev, const message *m_orig)
|
int bdev_sendrec(dev_t dev, const message *m_orig)
|
||||||
{
|
{
|
||||||
/* Send a request to the given device, and wait for the reply.
|
/* Send a synchronous request for the given device, and wait for the reply.
|
||||||
|
* Return ERESTART if the caller should try to reissue the request.
|
||||||
*/
|
*/
|
||||||
static long id = 0;
|
|
||||||
endpoint_t endpt;
|
endpoint_t endpt;
|
||||||
message m;
|
message m;
|
||||||
int r;
|
int r;
|
||||||
|
@ -49,15 +147,19 @@ int bdev_sendrec(dev_t dev, const message *m_orig)
|
||||||
|
|
||||||
/* Send the request and block until we receive a reply. */
|
/* Send the request and block until we receive a reply. */
|
||||||
m = *m_orig;
|
m = *m_orig;
|
||||||
m.BDEV_ID = ++id;
|
m.BDEV_ID = NO_ID;
|
||||||
|
|
||||||
r = sendrec(endpt, &m);
|
r = sendrec(endpt, &m);
|
||||||
|
|
||||||
/* This version of libbdev does not support recovery. Forget the driver. */
|
/* If communication failed, the driver has died. We assume it will be
|
||||||
|
* restarted soon after, so we attempt recovery. Upon success, we let the
|
||||||
|
* caller reissue the synchronous request.
|
||||||
|
*/
|
||||||
if (r == EDEADSRCDST) {
|
if (r == EDEADSRCDST) {
|
||||||
bdev_cancel(dev);
|
if (!bdev_recover(dev, TRUE /*update_endpt*/))
|
||||||
|
return EDEADSRCDST;
|
||||||
|
|
||||||
return EDEADSRCDST;
|
return ERESTART;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r != OK) {
|
if (r != OK) {
|
||||||
|
@ -68,22 +170,167 @@ int bdev_sendrec(dev_t dev, const message *m_orig)
|
||||||
if (m.m_type != BDEV_REPLY) {
|
if (m.m_type != BDEV_REPLY) {
|
||||||
printf("bdev: driver (%d) sent weird response (%d)\n",
|
printf("bdev: driver (%d) sent weird response (%d)\n",
|
||||||
endpt, m.m_type);
|
endpt, m.m_type);
|
||||||
return EIO;
|
return EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ERESTART signifies a driver restart. Again, we do not support this yet. */
|
/* The protocol contract states that no asynchronous reply can satisfy a
|
||||||
|
* synchronous SENDREC call, so we can never get an asynchronous reply here.
|
||||||
|
*/
|
||||||
|
if (m.BDEV_ID != NO_ID) {
|
||||||
|
printf("bdev: driver (%d) sent invalid ID (%ld)\n", endpt, m.BDEV_ID);
|
||||||
|
return EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unless the caller is misusing libbdev, we will only get ERESTART if we
|
||||||
|
* have managed to resend a raw block I/O request to the driver after a
|
||||||
|
* restart, but before VFS has had a chance to reopen the associated device
|
||||||
|
* first. This is highly exceptional, and hard to deal with correctly. We
|
||||||
|
* take the easiest route: sleep for a while so that VFS can reopen the
|
||||||
|
* device, and then resend the request. If the call keeps failing, the caller
|
||||||
|
* will eventually give up.
|
||||||
|
*/
|
||||||
if (m.BDEV_STATUS == ERESTART) {
|
if (m.BDEV_STATUS == ERESTART) {
|
||||||
bdev_cancel(dev);
|
printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
|
||||||
|
endpt);
|
||||||
|
|
||||||
return EDEADSRCDST;
|
micro_delay(1000);
|
||||||
}
|
|
||||||
|
|
||||||
if (m.BDEV_ID != id) {
|
return ERESTART;
|
||||||
printf("bdev: driver (%d) sent invalid response (%ld)\n",
|
|
||||||
endpt, m.BDEV_ID);
|
|
||||||
return EIO;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the result of our request. */
|
/* Return the result of our request. */
|
||||||
return m.BDEV_STATUS;
|
return m.BDEV_STATUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bdev_receive(dev_t dev, message *m)
|
||||||
|
{
|
||||||
|
/* Receive one valid message.
|
||||||
|
*/
|
||||||
|
endpoint_t endpt;
|
||||||
|
int r, nr_tries = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* Retrieve and check the driver endpoint on every try, as it will
|
||||||
|
* change with each driver restart.
|
||||||
|
*/
|
||||||
|
if ((endpt = bdev_driver_get(dev)) == NONE)
|
||||||
|
return EDEADSRCDST;
|
||||||
|
|
||||||
|
r = sef_receive(endpt, m);
|
||||||
|
|
||||||
|
if (r == EDEADSRCDST) {
|
||||||
|
/* If we reached the maximum number of retries, give up. */
|
||||||
|
if (++nr_tries == DRIVER_TRIES)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Attempt recovery. If successful, all asynchronous requests
|
||||||
|
* will have been resent, and we can retry receiving a reply.
|
||||||
|
*/
|
||||||
|
if (!bdev_recover(dev, TRUE /*update_endpt*/))
|
||||||
|
return EDEADSRCDST;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r != OK) {
|
||||||
|
printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m->m_type != BDEV_REPLY) {
|
||||||
|
printf("bdev: driver (%d) sent weird response (%d)\n",
|
||||||
|
endpt, m->m_type);
|
||||||
|
return EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The caller is responsible for checking the ID and status. */
|
||||||
|
return OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All tries failed, even though all recovery attempts succeeded. In this
|
||||||
|
* case, we let the caller recheck whether it wants to keep calling us,
|
||||||
|
* returning ERESTART to indicate we can be called again but did not actually
|
||||||
|
* receive a message.
|
||||||
|
*/
|
||||||
|
return ERESTART;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdev_reply_asyn(message *m)
|
||||||
|
{
|
||||||
|
/* A reply has come in from a disk driver.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
endpoint_t endpt;
|
||||||
|
bdev_id_t id;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
/* This is a requirement for the caller. */
|
||||||
|
assert(m->m_type == BDEV_REPLY);
|
||||||
|
|
||||||
|
/* Get the corresponding asynchronous call structure. */
|
||||||
|
id = m->BDEV_ID;
|
||||||
|
|
||||||
|
if ((call = bdev_call_get(id)) == NULL) {
|
||||||
|
printf("bdev: driver (%d) replied to unknown request (%ld)\n",
|
||||||
|
m->m_source, m->BDEV_ID);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure the reply was sent from the right endpoint. */
|
||||||
|
endpt = bdev_driver_get(call->dev);
|
||||||
|
|
||||||
|
if (m->m_source != endpt) {
|
||||||
|
/* If the endpoint is NONE, this may be a stray reply. */
|
||||||
|
if (endpt != NONE)
|
||||||
|
printf("bdev: driver (%d) replied to request not sent to it\n",
|
||||||
|
m->m_source);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See the ERESTART comment in bdev_sendrec(). */
|
||||||
|
if (m->BDEV_STATUS == ERESTART) {
|
||||||
|
printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
|
||||||
|
endpt);
|
||||||
|
|
||||||
|
micro_delay(1000);
|
||||||
|
|
||||||
|
if ((r = bdev_restart_asyn(call)) != OK)
|
||||||
|
bdev_callback_asyn(call, r);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bdev_callback_asyn(call, m->BDEV_STATUS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bdev_wait_asyn(bdev_id_t id)
|
||||||
|
{
|
||||||
|
/* Wait for an asynchronous request to complete.
|
||||||
|
*/
|
||||||
|
bdev_call_t *call;
|
||||||
|
dev_t dev;
|
||||||
|
message m;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if ((call = bdev_call_get(id)) == NULL)
|
||||||
|
return ENOENT;
|
||||||
|
|
||||||
|
dev = call->dev;
|
||||||
|
|
||||||
|
do {
|
||||||
|
if ((r = bdev_receive(dev, &m)) != OK && r != ERESTART)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
/* Processing the reply will free up the call structure as a side
|
||||||
|
* effect. If we repeatedly get ERESTART, we will repeatedly resend the
|
||||||
|
* asynchronous request, which will then eventually hit the retry limit
|
||||||
|
* and we will break out of the loop.
|
||||||
|
*/
|
||||||
|
if (r == OK)
|
||||||
|
bdev_reply_asyn(&m);
|
||||||
|
|
||||||
|
} while (bdev_call_get(id) != NULL);
|
||||||
|
|
||||||
|
return OK;
|
||||||
|
}
|
||||||
|
|
120
lib/libbdev/minor.c
Normal file
120
lib/libbdev/minor.c
Normal file
|
@ -0,0 +1,120 @@
|
||||||
|
/* libbdev - tracking and reopening of opened minor devices */
|
||||||
|
|
||||||
|
#include <minix/drivers.h>
|
||||||
|
#include <minix/bdev.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "const.h"
|
||||||
|
#include "type.h"
|
||||||
|
#include "proto.h"
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
dev_t dev;
|
||||||
|
int count;
|
||||||
|
int access;
|
||||||
|
} open_dev[NR_OPEN_DEVS] = { { NO_DEV, 0, 0 } };
|
||||||
|
|
||||||
|
int bdev_minor_reopen(dev_t dev)
|
||||||
|
{
|
||||||
|
/* Reopen all minor devices on a major device. This function duplicates some
|
||||||
|
* code from elsewhere, because in this case we must avoid performing recovery.
|
||||||
|
* FIXME: if reopening fails with a non-IPC error, we should attempt to close
|
||||||
|
* all minors that we did manage to reopen so far, or they might stay open
|
||||||
|
* forever.
|
||||||
|
*/
|
||||||
|
endpoint_t endpt;
|
||||||
|
message m;
|
||||||
|
int i, j, r, major;
|
||||||
|
|
||||||
|
major = major(dev);
|
||||||
|
endpt = bdev_driver_get(dev);
|
||||||
|
|
||||||
|
assert(endpt != NONE);
|
||||||
|
|
||||||
|
for (i = 0; i < NR_OPEN_DEVS; i++) {
|
||||||
|
if (major(open_dev[i].dev) != major)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Each minor device may have been opened multiple times. Send an open
|
||||||
|
* request for each time that it was opened before. We could reopen it
|
||||||
|
* just once, but then we'd have to keep a shadow open count as well.
|
||||||
|
*/
|
||||||
|
for (j = 0; j < open_dev[i].count; j++) {
|
||||||
|
memset(&m, 0, sizeof(m));
|
||||||
|
m.m_type = BDEV_OPEN;
|
||||||
|
m.BDEV_MINOR = minor(open_dev[i].dev);
|
||||||
|
m.BDEV_ACCESS = open_dev[i].access;
|
||||||
|
m.BDEV_ID = NO_ID;
|
||||||
|
|
||||||
|
if ((r = sendrec(endpt, &m)) != OK) {
|
||||||
|
printf("bdev: IPC to driver (%d) failed (%d)\n",
|
||||||
|
endpt, r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m.m_type != BDEV_REPLY) {
|
||||||
|
printf("bdev: driver (%d) sent weird response (%d)\n",
|
||||||
|
endpt, m.m_type);
|
||||||
|
return EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m.BDEV_ID != NO_ID) {
|
||||||
|
printf("bdev: driver (%d) sent invalid ID (%ld)\n",
|
||||||
|
endpt, m.BDEV_ID);
|
||||||
|
return EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((r = m.BDEV_STATUS) != OK) {
|
||||||
|
printf("bdev: driver (%d) failed device reopen (%d)\n",
|
||||||
|
endpt, r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdev_minor_add(dev_t dev, int access)
|
||||||
|
{
|
||||||
|
/* Increase the reference count of the given minor device.
|
||||||
|
*/
|
||||||
|
int i, free = -1;
|
||||||
|
|
||||||
|
for (i = 0; i < NR_OPEN_DEVS; i++) {
|
||||||
|
if (open_dev[i].dev == dev) {
|
||||||
|
open_dev[i].count++;
|
||||||
|
open_dev[i].access |= access;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (free < 0 && open_dev[i].dev == NO_DEV)
|
||||||
|
free = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (free < 0) {
|
||||||
|
printf("bdev: too many open devices, increase NR_OPEN_DEVS\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
open_dev[free].dev = dev;
|
||||||
|
open_dev[free].count = 1;
|
||||||
|
open_dev[free].access = access;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdev_minor_del(dev_t dev)
|
||||||
|
{
|
||||||
|
/* Decrease the reference count of the given minor device, if present.
|
||||||
|
*/
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < NR_OPEN_DEVS; i++) {
|
||||||
|
if (open_dev[i].dev == dev) {
|
||||||
|
if (!--open_dev[i].count)
|
||||||
|
open_dev[i].dev = NO_DEV;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,10 @@
|
||||||
#ifndef _BDEV_PROTO_H
|
#ifndef _BDEV_PROTO_H
|
||||||
#define _BDEV_PROTO_H
|
#define _BDEV_PROTO_H
|
||||||
|
|
||||||
|
/* bdev.c */
|
||||||
|
extern void bdev_callback_asyn(bdev_call_t *call, int result);
|
||||||
|
extern int bdev_restart_asyn(bdev_call_t *call);
|
||||||
|
|
||||||
/* driver.c */
|
/* driver.c */
|
||||||
extern void bdev_driver_init(void);
|
extern void bdev_driver_init(void);
|
||||||
extern void bdev_driver_clear(dev_t dev);
|
extern void bdev_driver_clear(dev_t dev);
|
||||||
|
@ -8,8 +12,22 @@ extern endpoint_t bdev_driver_set(dev_t dev, char *label);
|
||||||
extern endpoint_t bdev_driver_get(dev_t dev);
|
extern endpoint_t bdev_driver_get(dev_t dev);
|
||||||
extern endpoint_t bdev_driver_update(dev_t dev);
|
extern endpoint_t bdev_driver_update(dev_t dev);
|
||||||
|
|
||||||
|
/* call.c */
|
||||||
|
extern bdev_call_t *bdev_call_alloc(int count);
|
||||||
|
extern void bdev_call_free(bdev_call_t *call);
|
||||||
|
extern bdev_call_t *bdev_call_get(bdev_id_t id);
|
||||||
|
extern bdev_call_t *bdev_call_find(dev_t dev);
|
||||||
|
extern bdev_call_t *bdev_call_iter_maj(dev_t dev, bdev_call_t *last,
|
||||||
|
bdev_call_t **next);
|
||||||
|
|
||||||
/* ipc.c */
|
/* ipc.c */
|
||||||
extern void bdev_update(dev_t dev, char *label);
|
extern void bdev_update(dev_t dev, char *label);
|
||||||
|
extern int bdev_senda(dev_t dev, const message *m_orig, bdev_id_t num);
|
||||||
extern int bdev_sendrec(dev_t dev, const message *m_orig);
|
extern int bdev_sendrec(dev_t dev, const message *m_orig);
|
||||||
|
|
||||||
|
/* minor.c */
|
||||||
|
extern int bdev_minor_reopen(dev_t dev);
|
||||||
|
extern void bdev_minor_add(dev_t dev, int access);
|
||||||
|
extern void bdev_minor_del(dev_t dev);
|
||||||
|
|
||||||
#endif /* _BDEV_PROTO_H */
|
#endif /* _BDEV_PROTO_H */
|
||||||
|
|
16
lib/libbdev/type.h
Normal file
16
lib/libbdev/type.h
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#ifndef _BDEV_TYPE_H
|
||||||
|
#define _BDEV_TYPE_H
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
bdev_id_t id; /* call ID */
|
||||||
|
dev_t dev; /* target device number */
|
||||||
|
message msg; /* request message */
|
||||||
|
bdev_callback_t callback; /* callback function */
|
||||||
|
bdev_param_t param; /* callback parameter */
|
||||||
|
int driver_tries; /* times retried on driver restarts */
|
||||||
|
int transfer_tries; /* times retried on transfer errors */
|
||||||
|
iovec_t *vec; /* original vector */
|
||||||
|
iovec_s_t gvec[1]; /* grant vector */
|
||||||
|
} bdev_call_t;
|
||||||
|
|
||||||
|
#endif /* _BDEV_TYPE_H */
|
Loading…
Reference in a new issue