0522c9d0c1
Change-Id: I48ca5a4f77cc9e924e8aee2fe6c32e0da9b42560
1051 lines
27 KiB
C
1051 lines
27 KiB
C
/* Filter driver - lowest layer - disk driver management */
|
|
|
|
#include "inc.h"
|
|
|
|
/* Drivers. */
|
|
static struct driverinfo driver[2];
|
|
|
|
/* State variables. */
|
|
static asynmsg_t amsgtable[2];
|
|
|
|
static int size_known = 0;
|
|
static u64_t disk_size;
|
|
|
|
static int problem_stats[BD_LAST] = { 0 };
|
|
|
|
/*===========================================================================*
|
|
* driver_open *
|
|
*===========================================================================*/
|
|
static int driver_open(int which)
|
|
{
|
|
/* Perform an open or close operation on the driver. This is
|
|
* unfinished code: we should never be doing a blocking ipc_sendrec()
|
|
* to the driver.
|
|
*/
|
|
message msg;
|
|
cp_grant_id_t gid;
|
|
struct part_geom part;
|
|
sector_t sectors;
|
|
int r;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.m_type = BDEV_OPEN;
|
|
msg.BDEV_MINOR = driver[which].minor;
|
|
msg.BDEV_ACCESS = BDEV_R_BIT | BDEV_W_BIT;
|
|
msg.BDEV_ID = 0;
|
|
r = ipc_sendrec(driver[which].endpt, &msg);
|
|
|
|
if (r != OK) {
|
|
/* Should we restart the driver now? */
|
|
printf("Filter: driver_open: ipc_sendrec returned %d\n", r);
|
|
|
|
return RET_REDO;
|
|
}
|
|
|
|
if(msg.m_type != BDEV_REPLY ||
|
|
msg.m_lblockdriver_lbdev_reply.status != OK) {
|
|
printf("Filter: driver_open: ipc_sendrec returned %d, %d\n",
|
|
msg.m_type, msg.m_lblockdriver_lbdev_reply.status);
|
|
|
|
return RET_REDO;
|
|
}
|
|
|
|
/* Take the opportunity to retrieve the hard disk size. */
|
|
gid = cpf_grant_direct(driver[which].endpt,
|
|
(vir_bytes) &part, sizeof(part), CPF_WRITE);
|
|
if(!GRANT_VALID(gid))
|
|
panic("invalid grant: %d", gid);
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.m_type = BDEV_IOCTL;
|
|
msg.BDEV_MINOR = driver[which].minor;
|
|
msg.BDEV_REQUEST = DIOCGETP;
|
|
msg.BDEV_GRANT = gid;
|
|
msg.BDEV_USER = NONE;
|
|
msg.BDEV_ID = 0;
|
|
|
|
r = ipc_sendrec(driver[which].endpt, &msg);
|
|
|
|
cpf_revoke(gid);
|
|
|
|
if (r != OK || msg.m_type != BDEV_REPLY ||
|
|
msg.m_lblockdriver_lbdev_reply.status != OK) {
|
|
/* Not sure what to do here, either. */
|
|
printf("Filter: ioctl(DIOCGETP) returned (%d, %d)\n",
|
|
r, msg.m_type);
|
|
|
|
return RET_REDO;
|
|
}
|
|
|
|
if(!size_known) {
|
|
disk_size = part.size;
|
|
size_known = 1;
|
|
sectors = (unsigned long)(disk_size / SECTOR_SIZE);
|
|
if ((u64_t)sectors * SECTOR_SIZE != disk_size) {
|
|
printf("Filter: partition too large\n");
|
|
|
|
return RET_REDO;
|
|
}
|
|
#if DEBUG
|
|
printf("Filter: partition size: 0x%"PRIx64" / %lu sectors\n",
|
|
disk_size, sectors);
|
|
#endif
|
|
} else {
|
|
if (disk_size != part.size) {
|
|
printf("Filter: partition size mismatch "
|
|
"(0x%"PRIx64" != 0x%"PRIx64")\n",
|
|
part.size, disk_size);
|
|
|
|
return RET_REDO;
|
|
}
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* driver_close *
|
|
*===========================================================================*/
|
|
static int driver_close(int which)
|
|
{
|
|
message msg;
|
|
int r;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.m_type = BDEV_CLOSE;
|
|
msg.BDEV_MINOR = driver[which].minor;
|
|
msg.BDEV_ID = 0;
|
|
r = ipc_sendrec(driver[which].endpt, &msg);
|
|
|
|
if (r != OK) {
|
|
/* Should we restart the driver now? */
|
|
printf("Filter: driver_close: ipc_sendrec returned %d\n", r);
|
|
|
|
return RET_REDO;
|
|
}
|
|
|
|
if(msg.m_type != BDEV_REPLY ||
|
|
msg.m_lblockdriver_lbdev_reply.status != OK) {
|
|
printf("Filter: driver_close: ipc_sendrec returned %d, %d\n",
|
|
msg.m_type, msg.m_lblockdriver_lbdev_reply.status);
|
|
|
|
return RET_REDO;
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* driver_init *
|
|
*===========================================================================*/
|
|
void driver_init(void)
|
|
{
|
|
/* Initialize the driver layer. */
|
|
int r;
|
|
|
|
memset(driver, 0, sizeof(driver));
|
|
|
|
/* Endpoints unknown. */
|
|
driver[DRIVER_MAIN].endpt = NONE;
|
|
driver[DRIVER_BACKUP].endpt = NONE;
|
|
|
|
/* Get disk driver's and this proc's endpoint. */
|
|
driver[DRIVER_MAIN].label = MAIN_LABEL;
|
|
driver[DRIVER_MAIN].minor = MAIN_MINOR;
|
|
|
|
/* No up received yet but expected when the driver starts. */
|
|
driver[DRIVER_MAIN].up_event = UP_EXPECTED;
|
|
driver[DRIVER_BACKUP].up_event = UP_EXPECTED;
|
|
|
|
r = ds_retrieve_label_endpt(driver[DRIVER_MAIN].label,
|
|
&driver[DRIVER_MAIN].endpt);
|
|
if (r != OK) {
|
|
printf("Filter: failed to get main disk driver's endpoint: "
|
|
"%d\n", r);
|
|
bad_driver(DRIVER_MAIN, BD_DEAD, EFAULT);
|
|
check_driver(DRIVER_MAIN);
|
|
}
|
|
else if (driver_open(DRIVER_MAIN) != OK) {
|
|
panic("unhandled driver_open failure");
|
|
}
|
|
|
|
if(USE_MIRROR) {
|
|
driver[DRIVER_BACKUP].label = BACKUP_LABEL;
|
|
driver[DRIVER_BACKUP].minor = BACKUP_MINOR;
|
|
|
|
if(!strcmp(driver[DRIVER_MAIN].label,
|
|
driver[DRIVER_BACKUP].label)) {
|
|
panic("same driver: not tested");
|
|
}
|
|
|
|
r = ds_retrieve_label_endpt(driver[DRIVER_BACKUP].label,
|
|
&driver[DRIVER_BACKUP].endpt);
|
|
if (r != OK) {
|
|
printf("Filter: failed to get backup disk driver's "
|
|
"endpoint: %d\n", r);
|
|
bad_driver(DRIVER_BACKUP, BD_DEAD, EFAULT);
|
|
check_driver(DRIVER_BACKUP);
|
|
}
|
|
else if (driver_open(DRIVER_BACKUP) != OK) {
|
|
panic("unhandled driver_open failure");
|
|
}
|
|
}
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* driver_shutdown *
|
|
*===========================================================================*/
|
|
void driver_shutdown(void)
|
|
{
|
|
/* Clean up. */
|
|
|
|
#if DEBUG
|
|
printf("Filter: %u driver deaths, %u protocol errors, "
|
|
"%u data errors\n", problem_stats[BD_DEAD],
|
|
problem_stats[BD_PROTO], problem_stats[BD_DATA]);
|
|
#endif
|
|
|
|
if(driver_close(DRIVER_MAIN) != OK)
|
|
printf("Filter: BDEV_CLOSE failed on shutdown (1)\n");
|
|
|
|
if(USE_MIRROR)
|
|
if(driver_close(DRIVER_BACKUP) != OK)
|
|
printf("Filter: BDEV_CLOSE failed on shutdown (2)\n");
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* get_raw_size *
|
|
*===========================================================================*/
|
|
u64_t get_raw_size(void)
|
|
{
|
|
/* Return the size of the raw disks as used by the filter driver.
|
|
*/
|
|
|
|
return disk_size;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* reset_kills *
|
|
*===========================================================================*/
|
|
void reset_kills(void)
|
|
{
|
|
/* Reset kill and retry statistics. */
|
|
driver[DRIVER_MAIN].kills = 0;
|
|
driver[DRIVER_MAIN].retries = 0;
|
|
driver[DRIVER_BACKUP].kills = 0;
|
|
driver[DRIVER_BACKUP].retries = 0;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* bad_driver *
|
|
*===========================================================================*/
|
|
int bad_driver(int which, int type, int error)
|
|
{
|
|
/* A disk driver has died or produced an error. Mark it so that we can
|
|
* deal with it later, and return RET_REDO to indicate that the
|
|
* current operation is to be retried. Also store an error code to
|
|
* return to the user if the situation is unrecoverable.
|
|
*/
|
|
driver[which].problem = type;
|
|
driver[which].error = error;
|
|
|
|
return RET_REDO;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* new_driver_ep *
|
|
*===========================================================================*/
|
|
static int new_driver_ep(int which)
|
|
{
|
|
/* See if a new driver instance has already been started for the given
|
|
* driver, by retrieving its entry from DS.
|
|
*/
|
|
int r;
|
|
endpoint_t endpt;
|
|
|
|
r = ds_retrieve_label_endpt(driver[which].label, &endpt);
|
|
|
|
if (r != OK) {
|
|
printf("Filter: DS query for %s failed\n",
|
|
driver[which].label);
|
|
|
|
return 0;
|
|
}
|
|
|
|
if (endpt == driver[which].endpt) {
|
|
#if DEBUG
|
|
printf("Filter: same endpoint for %s\n", driver[which].label);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
#if DEBUG
|
|
printf("Filter: new enpdoint for %s: %d -> %d\n", driver[which].label,
|
|
driver[which].endpt, endpt);
|
|
#endif
|
|
|
|
driver[which].endpt = endpt;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* check_problem *
|
|
*===========================================================================*/
|
|
static int check_problem(int which, int problem, int retries, int *tell_rs)
|
|
{
|
|
/* A problem has occurred with a driver. Update statistics, and decide
|
|
* what to do. If EAGAIN is returned, the driver should be restarted;
|
|
* any other result will be passed up.
|
|
*/
|
|
|
|
#if DEBUG
|
|
printf("Filter: check_problem processing driver %d, problem %d\n",
|
|
which, problem);
|
|
#endif
|
|
|
|
problem_stats[problem]++;
|
|
|
|
if(new_driver_ep(which)) {
|
|
#if DEBUG
|
|
printf("Filter: check_problem: noticed a new driver\n");
|
|
#endif
|
|
|
|
if(driver_open(which) == OK) {
|
|
#if DEBUG2
|
|
printf("Filter: open OK -> no recovery\n");
|
|
#endif
|
|
return OK;
|
|
} else {
|
|
#if DEBUG2
|
|
printf("Filter: open not OK -> recovery\n");
|
|
#endif
|
|
problem = BD_PROTO;
|
|
problem_stats[problem]++;
|
|
}
|
|
}
|
|
|
|
/* If the driver has died, we always need to restart it. If it has
|
|
* been giving problems, we first retry the request, up to N times,
|
|
* after which we kill and restart the driver. We restart the driver
|
|
* up to M times, after which we remove the driver from the mirror
|
|
* configuration. If we are not set up to do mirroring, we can only
|
|
* do one thing, and that is continue to limp along with the bad
|
|
* driver..
|
|
*/
|
|
switch(problem) {
|
|
case BD_PROTO:
|
|
case BD_DATA:
|
|
driver[which].retries++;
|
|
|
|
#if DEBUG
|
|
printf("Filter: disk driver %d has had "
|
|
"%d/%d retry attempts, %d/%d kills\n", which,
|
|
driver[which].retries, NR_RETRIES,
|
|
driver[which].kills, NR_RESTARTS);
|
|
#endif
|
|
|
|
if (driver[which].retries < NR_RETRIES) {
|
|
if(retries == 1) {
|
|
#if DEBUG
|
|
printf("Filter: not restarting; retrying "
|
|
"(retries %d/%d, kills %d/%d)\n",
|
|
driver[which].retries, NR_RETRIES,
|
|
driver[which].kills, NR_RESTARTS);
|
|
#endif
|
|
return OK;
|
|
}
|
|
#if DEBUG
|
|
printf("Filter: restarting (retries %d/%d, "
|
|
"kills %d/%d, internal retry %d)\n",
|
|
driver[which].retries, NR_RETRIES,
|
|
driver[which].kills, NR_RESTARTS, retries);
|
|
#endif
|
|
}
|
|
|
|
#if DEBUG
|
|
printf("Filter: disk driver %d has reached error "
|
|
"threshold, restarting driver\n", which);
|
|
#endif
|
|
|
|
*tell_rs = (driver[which].up_event != UP_PENDING);
|
|
break;
|
|
|
|
case BD_DEAD:
|
|
/* Can't kill that which is already dead.. */
|
|
*tell_rs = 0;
|
|
break;
|
|
|
|
default:
|
|
panic("invalid problem: %d", problem);
|
|
}
|
|
|
|
/* At this point, the driver will be restarted. */
|
|
driver[which].retries = 0;
|
|
driver[which].kills++;
|
|
|
|
if (driver[which].kills < NR_RESTARTS)
|
|
return EAGAIN;
|
|
|
|
/* We've reached the maximum number of restarts for this driver. */
|
|
if (USE_MIRROR) {
|
|
printf("Filter: kill threshold reached, disabling mirroring\n");
|
|
|
|
USE_MIRROR = 0;
|
|
|
|
if (which == DRIVER_MAIN) {
|
|
driver[DRIVER_MAIN] = driver[DRIVER_BACKUP];
|
|
|
|
/* This is not necessary. */
|
|
strlcpy(MAIN_LABEL, BACKUP_LABEL, sizeof(MAIN_LABEL));
|
|
MAIN_MINOR = BACKUP_MINOR;
|
|
}
|
|
|
|
driver[DRIVER_BACKUP].endpt = NONE;
|
|
|
|
return OK;
|
|
}
|
|
else {
|
|
/* We tried, we really did. But now we give up. Tell the user.
|
|
*/
|
|
printf("Filter: kill threshold reached, returning error\n");
|
|
|
|
if (driver[which].error == EAGAIN) return EIO;
|
|
|
|
return driver[which].error;
|
|
}
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* restart_driver *
|
|
*===========================================================================*/
|
|
static void restart_driver(int which, int tell_rs)
|
|
{
|
|
/* Restart the given driver. Block until the new instance is up.
|
|
*/
|
|
message msg;
|
|
int ipc_status;
|
|
int r;
|
|
|
|
if (tell_rs) {
|
|
/* Tell RS to refresh or restart the driver */
|
|
msg.m_type = RS_REFRESH;
|
|
msg.m_rs_req.addr = driver[which].label;
|
|
msg.m_rs_req.len = strlen(driver[which].label);
|
|
|
|
#if DEBUG
|
|
printf("Filter: asking RS to refresh %s..\n",
|
|
driver[which].label);
|
|
#endif
|
|
|
|
r = ipc_sendrec(RS_PROC_NR, &msg);
|
|
|
|
if (r != OK || msg.m_type != OK)
|
|
panic("RS request failed: %d", r);
|
|
|
|
#if DEBUG
|
|
printf("Filter: RS call succeeded\n");
|
|
#endif
|
|
}
|
|
|
|
/* Wait until the new driver instance is up, and get its endpoint. */
|
|
#if DEBUG
|
|
printf("Filter: endpoint update driver %d; old endpoint %d\n",
|
|
which, driver[which].endpt);
|
|
#endif
|
|
|
|
if(driver[which].up_event == UP_EXPECTED) {
|
|
driver[which].up_event = UP_NONE;
|
|
}
|
|
while(driver[which].up_event != UP_PENDING) {
|
|
r = driver_receive(DS_PROC_NR, &msg, &ipc_status);
|
|
if(r != OK)
|
|
panic("driver_receive returned error: %d", r);
|
|
|
|
ds_event();
|
|
}
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* check_driver *
|
|
*===========================================================================*/
|
|
int check_driver(int which)
|
|
{
|
|
/* See if the given driver has been troublesome, and if so, deal with
|
|
* it.
|
|
*/
|
|
int problem, tell_rs;
|
|
int r, retries = 0;
|
|
|
|
problem = driver[which].problem;
|
|
|
|
if (problem == BD_NONE)
|
|
return OK;
|
|
|
|
do {
|
|
if(retries) {
|
|
#if DEBUG
|
|
printf("Filter: check_driver: retry number %d\n",
|
|
retries);
|
|
#endif
|
|
problem = BD_PROTO;
|
|
}
|
|
retries++;
|
|
driver[which].problem = BD_NONE;
|
|
|
|
/* Decide what to do: continue operation, restart the driver,
|
|
* or return an error.
|
|
*/
|
|
r = check_problem(which, problem, retries, &tell_rs);
|
|
if (r != EAGAIN)
|
|
return r;
|
|
|
|
/* Restarting the driver it is. First tell RS (if necessary),
|
|
* then wait for the new driver instance to come up.
|
|
*/
|
|
restart_driver(which, tell_rs);
|
|
|
|
/* Finally, open the device on the new driver */
|
|
} while (driver_open(which) != OK);
|
|
|
|
#if DEBUG
|
|
printf("Filter: check_driver restarted driver %d, endpoint %d\n",
|
|
which, driver[which].endpt);
|
|
#endif
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* flt_senda *
|
|
*===========================================================================*/
|
|
static int flt_senda(message *mess, int which)
|
|
{
|
|
/* Send a message to one driver. Can only return OK at the moment. */
|
|
int r;
|
|
asynmsg_t *amp;
|
|
|
|
/* Fill in the last bits of the message. */
|
|
mess->BDEV_MINOR = driver[which].minor;
|
|
mess->BDEV_ID = 0;
|
|
|
|
/* Send the message asynchronously. */
|
|
amp = &amsgtable[which];
|
|
amp->dst = driver[which].endpt;
|
|
amp->msg = *mess;
|
|
amp->flags = AMF_VALID;
|
|
r = ipc_senda(amsgtable, 2);
|
|
|
|
if(r != OK)
|
|
panic("ipc_senda returned error: %d", r);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* check_senda *
|
|
*===========================================================================*/
|
|
static int check_senda(int which)
|
|
{
|
|
/* Check whether an earlier senda resulted in an error indicating the
|
|
* message never got delivered. Only in that case can we reliably say
|
|
* that the driver died. Return BD_DEAD in this case, and BD_PROTO
|
|
* otherwise.
|
|
*/
|
|
asynmsg_t *amp;
|
|
|
|
amp = &amsgtable[which];
|
|
|
|
if ((amp->flags & AMF_DONE) && (amp->result == EDEADSRCDST)) {
|
|
|
|
return BD_DEAD;
|
|
}
|
|
|
|
return BD_PROTO;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* flt_receive *
|
|
*===========================================================================*/
|
|
static int flt_receive(message *mess, int which)
|
|
{
|
|
/* Receive a message from one or either driver, unless a timeout
|
|
* occurs. Can only return OK or RET_REDO.
|
|
*/
|
|
int r;
|
|
int ipc_status;
|
|
|
|
for (;;) {
|
|
r = driver_receive(ANY, mess, &ipc_status);
|
|
if(r != OK)
|
|
panic("driver_receive returned error: %d", r);
|
|
|
|
if(mess->m_source == DS_PROC_NR && is_ipc_notify(ipc_status)) {
|
|
ds_event();
|
|
continue;
|
|
}
|
|
|
|
if(mess->m_source == CLOCK && is_ipc_notify(ipc_status)) {
|
|
if (mess->m_notify.timestamp < flt_alarm((clock_t) -1)) {
|
|
#if DEBUG
|
|
printf("Filter: SKIPPING old alarm "
|
|
"notification\n");
|
|
#endif
|
|
continue;
|
|
}
|
|
|
|
#if DEBUG
|
|
printf("Filter: timeout waiting for disk driver %d "
|
|
"reply!\n", which);
|
|
#endif
|
|
|
|
/* If we're waiting for either driver,
|
|
* both are at fault.
|
|
*/
|
|
if (which < 0) {
|
|
bad_driver(DRIVER_MAIN,
|
|
check_senda(DRIVER_MAIN), EFAULT);
|
|
|
|
return bad_driver(DRIVER_BACKUP,
|
|
check_senda(DRIVER_BACKUP), EFAULT);
|
|
}
|
|
|
|
/* Otherwise, just report the one not replying as dead.
|
|
*/
|
|
return bad_driver(which, check_senda(which), EFAULT);
|
|
}
|
|
|
|
if (mess->m_source != driver[DRIVER_MAIN].endpt &&
|
|
mess->m_source != driver[DRIVER_BACKUP].endpt) {
|
|
#if DEBUG
|
|
printf("Filter: got STRAY message %d from %d\n",
|
|
mess->m_type, mess->m_source);
|
|
#endif
|
|
|
|
continue;
|
|
}
|
|
|
|
/* We are waiting for a reply from one specific driver. */
|
|
if (which >= 0) {
|
|
/* If the message source is that driver, good. */
|
|
if (mess->m_source == driver[which].endpt)
|
|
break;
|
|
|
|
/* This should probably be treated as a real protocol
|
|
* error. We do not abort any receives (not even paired
|
|
* receives) except because of timeouts. Getting here
|
|
* means a driver replied at least the timeout period
|
|
* later than expected, which should be enough reason
|
|
* to kill it really. The other explanation is that it
|
|
* is actually violating the protocol and sending bogus
|
|
* messages...
|
|
*/
|
|
#if DEBUG
|
|
printf("Filter: got UNEXPECTED reply from %d\n",
|
|
mess->m_source);
|
|
#endif
|
|
|
|
continue;
|
|
}
|
|
|
|
/* We got a message from one of the drivers, and we didn't
|
|
* care which one we wanted to receive from. A-OK.
|
|
*/
|
|
break;
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* flt_sendrec *
|
|
*===========================================================================*/
|
|
static int flt_sendrec(message *mess, int which)
|
|
{
|
|
int r;
|
|
|
|
r = flt_senda(mess, which);
|
|
if(r != OK)
|
|
return r;
|
|
|
|
if(check_senda(which) == BD_DEAD) {
|
|
return bad_driver(which, BD_DEAD, EFAULT);
|
|
}
|
|
|
|
/* Set alarm. */
|
|
flt_alarm(DRIVER_TIMEOUT);
|
|
|
|
r = flt_receive(mess, which);
|
|
|
|
/* Clear the alarm. */
|
|
flt_alarm(0);
|
|
return r;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_sendrec_both *
|
|
*===========================================================================*/
|
|
static int do_sendrec_both(message *m1, message *m2)
|
|
{
|
|
/* If USEE_MIRROR is set, call flt_sendrec() to both drivers.
|
|
* Otherwise, only call flt_sendrec() to the main driver.
|
|
* This function will only return either OK or RET_REDO.
|
|
*/
|
|
int r, which = -1;
|
|
message ma, mb;
|
|
|
|
/* If the two disks use the same driver, call flt_sendrec() twice
|
|
* sequentially. Such a setup is not very useful though.
|
|
*/
|
|
if (!strcmp(driver[DRIVER_MAIN].label, driver[DRIVER_BACKUP].label)) {
|
|
if ((r = flt_sendrec(m1, DRIVER_MAIN)) != OK) return r;
|
|
return flt_sendrec(m2, DRIVER_BACKUP);
|
|
}
|
|
|
|
/* If the two disks use different drivers, call flt_senda()
|
|
* twice, and then flt_receive(), and distinguish the return
|
|
* messages by means of m_source.
|
|
*/
|
|
if ((r = flt_senda(m1, DRIVER_MAIN)) != OK) return r;
|
|
if ((r = flt_senda(m2, DRIVER_BACKUP)) != OK) return r;
|
|
|
|
/* Set alarm. */
|
|
flt_alarm(DRIVER_TIMEOUT);
|
|
|
|
/* The message received by the 1st flt_receive() may not be
|
|
* from DRIVER_MAIN.
|
|
*/
|
|
if ((r = flt_receive(&ma, -1)) != OK) {
|
|
flt_alarm(0);
|
|
return r;
|
|
}
|
|
|
|
if (ma.m_source == driver[DRIVER_MAIN].endpt) {
|
|
which = DRIVER_BACKUP;
|
|
} else if (ma.m_source == driver[DRIVER_BACKUP].endpt) {
|
|
which = DRIVER_MAIN;
|
|
} else {
|
|
panic("message from unexpected source: %d",
|
|
ma.m_source);
|
|
}
|
|
|
|
r = flt_receive(&mb, which);
|
|
|
|
/* Clear the alarm. */
|
|
flt_alarm(0);
|
|
|
|
if(r != OK)
|
|
return r;
|
|
|
|
if (ma.m_source == driver[DRIVER_MAIN].endpt) {
|
|
*m1 = ma;
|
|
*m2 = mb;
|
|
} else {
|
|
*m1 = mb;
|
|
*m2 = ma;
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_sendrec_one *
|
|
*===========================================================================*/
|
|
static int do_sendrec_one(message *m1)
|
|
{
|
|
/* Only talk to the main driver. If something goes wrong, it will
|
|
* be fixed elsewhere.
|
|
* This function will only return either OK or RET_REDO.
|
|
*/
|
|
|
|
return flt_sendrec(m1, DRIVER_MAIN);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* paired_sendrec *
|
|
*===========================================================================*/
|
|
static int paired_sendrec(message *m1, message *m2, int both)
|
|
{
|
|
/* Sendrec with the disk driver. If the disk driver is down, and was
|
|
* restarted, redo the request, until the driver works fine, or can't
|
|
* be restarted again.
|
|
*/
|
|
int r;
|
|
|
|
#if DEBUG2
|
|
printf("paired_sendrec(%d) - <%d,%llx,%d> - %x,%x\n",
|
|
both, m1->m_type, m1->BDEV_POS,
|
|
m1->BDEV_COUNT, m1->BDEV_GRANT, m2->BDEV_GRANT);
|
|
#endif
|
|
|
|
if (both)
|
|
r = do_sendrec_both(m1, m2);
|
|
else
|
|
r = do_sendrec_one(m1);
|
|
|
|
#if DEBUG2
|
|
if (r != OK)
|
|
printf("paired_sendrec about to return %d\n", r);
|
|
#endif
|
|
|
|
return r;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* single_grant *
|
|
*===========================================================================*/
|
|
static int single_grant(endpoint_t endpt, vir_bytes buf, int access,
|
|
cp_grant_id_t *gid, iovec_s_t vector[NR_IOREQS], size_t size)
|
|
{
|
|
/* Create grants for a vectored request to a single driver.
|
|
*/
|
|
cp_grant_id_t grant;
|
|
size_t chunk;
|
|
int count;
|
|
|
|
/* Split up the request into chunks, if requested. This makes no
|
|
* difference at all, except that this works around a weird performance
|
|
* bug with large DMA PRDs on some machines.
|
|
*/
|
|
if (CHUNK_SIZE > 0) chunk = CHUNK_SIZE;
|
|
else chunk = size;
|
|
|
|
/* Fill in the vector, creating a grant for each item. */
|
|
for (count = 0; size > 0 && count < NR_IOREQS; count++) {
|
|
/* The last chunk will contain all the remaining data. */
|
|
if (chunk > size || count == NR_IOREQS - 1)
|
|
chunk = size;
|
|
|
|
grant = cpf_grant_direct(endpt, buf, chunk, access);
|
|
if (!GRANT_VALID(grant))
|
|
panic("invalid grant: %d", grant);
|
|
|
|
vector[count].iov_grant = grant;
|
|
vector[count].iov_size = chunk;
|
|
|
|
buf += chunk;
|
|
size -= chunk;
|
|
}
|
|
|
|
/* Then create a grant for the vector itself. */
|
|
*gid = cpf_grant_direct(endpt, (vir_bytes) vector,
|
|
sizeof(vector[0]) * count, CPF_READ);
|
|
|
|
if (!GRANT_VALID(*gid))
|
|
panic("invalid grant: %d", *gid);
|
|
|
|
return count;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* paired_grant *
|
|
*===========================================================================*/
|
|
static int paired_grant(char *buf1, char *buf2, int request,
|
|
cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS], size_t size,
|
|
int both)
|
|
{
|
|
/* Create memory grants, either to one or to both drivers.
|
|
*/
|
|
int count, access;
|
|
|
|
count = 0;
|
|
access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE;
|
|
|
|
if(driver[DRIVER_MAIN].endpt > 0) {
|
|
count = single_grant(driver[DRIVER_MAIN].endpt,
|
|
(vir_bytes) buf1, access, &gids[0], vectors[0], size);
|
|
}
|
|
|
|
if (both) {
|
|
if(driver[DRIVER_BACKUP].endpt > 0) {
|
|
count = single_grant(driver[DRIVER_BACKUP].endpt,
|
|
(vir_bytes) buf2, access, &gids[1],
|
|
vectors[1], size);
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* single_revoke *
|
|
*===========================================================================*/
|
|
static void single_revoke(cp_grant_id_t gid,
|
|
const iovec_s_t vector[NR_IOREQS], int count)
|
|
{
|
|
/* Revoke all grants associated with a request to a single driver.
|
|
* Modify the given size to reflect the actual I/O performed.
|
|
*/
|
|
int i;
|
|
|
|
/* Revoke the grants for all the elements of the vector. */
|
|
for (i = 0; i < count; i++)
|
|
cpf_revoke(vector[i].iov_grant);
|
|
|
|
/* Then revoke the grant for the vector itself. */
|
|
cpf_revoke(gid);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* paired_revoke *
|
|
*===========================================================================*/
|
|
static void paired_revoke(const cp_grant_id_t *gids,
|
|
iovec_s_t vectors[2][NR_IOREQS], int count, int both)
|
|
{
|
|
/* Revoke grants to drivers for a single request.
|
|
*/
|
|
|
|
single_revoke(gids[0], vectors[0], count);
|
|
|
|
if (both)
|
|
single_revoke(gids[1], vectors[1], count);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* read_write *
|
|
*===========================================================================*/
|
|
int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
|
|
{
|
|
iovec_s_t vectors[2][NR_IOREQS];
|
|
message m1, m2;
|
|
cp_grant_id_t gids[2];
|
|
int r, both, count;
|
|
|
|
gids[0] = gids[1] = GRANT_INVALID;
|
|
|
|
/* Send two requests only if mirroring is enabled and the given request
|
|
* is either FLT_READ2 or FLT_WRITE.
|
|
*/
|
|
both = (USE_MIRROR && request != FLT_READ);
|
|
|
|
count = paired_grant(bufa, bufb, request, gids, vectors, *sizep, both);
|
|
|
|
memset(&m1, 0, sizeof(m1));
|
|
m1.m_type = (request == FLT_WRITE) ? BDEV_SCATTER : BDEV_GATHER;
|
|
m1.BDEV_COUNT = count;
|
|
m1.BDEV_POS = pos;
|
|
|
|
m2 = m1;
|
|
|
|
m1.BDEV_GRANT = gids[0];
|
|
m2.BDEV_GRANT = gids[1];
|
|
|
|
r = paired_sendrec(&m1, &m2, both);
|
|
|
|
paired_revoke(gids, vectors, count, both);
|
|
|
|
if(r != OK) {
|
|
#if DEBUG
|
|
if (r != RET_REDO)
|
|
printf("Filter: paired_sendrec returned %d\n", r);
|
|
#endif
|
|
return r;
|
|
}
|
|
|
|
if (m1.m_type != BDEV_REPLY ||
|
|
m1.m_lblockdriver_lbdev_reply.status < 0) {
|
|
printf("Filter: unexpected/invalid reply from main driver: "
|
|
"(%x, %d)\n", m1.m_type,
|
|
m1.m_lblockdriver_lbdev_reply.status);
|
|
|
|
return bad_driver(DRIVER_MAIN, BD_PROTO,
|
|
(m1.m_type == BDEV_REPLY) ?
|
|
m1.m_lblockdriver_lbdev_reply.status : EFAULT);
|
|
}
|
|
|
|
if (m1.m_lblockdriver_lbdev_reply.status != (ssize_t) *sizep) {
|
|
printf("Filter: truncated reply from main driver\n");
|
|
|
|
/* If the driver returned a value *larger* than we requested,
|
|
* OR if we did NOT exceed the disk size, then we should
|
|
* report the driver for acting strangely!
|
|
*/
|
|
if (m1.m_lblockdriver_lbdev_reply.status > (ssize_t) *sizep ||
|
|
(pos + (unsigned int)
|
|
m1.m_lblockdriver_lbdev_reply.status < disk_size))
|
|
return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT);
|
|
|
|
/* Return the actual size. */
|
|
*sizep = m1.m_lblockdriver_lbdev_reply.status;
|
|
}
|
|
|
|
if (both) {
|
|
if (m2.m_type != BDEV_REPLY ||
|
|
m2.m_lblockdriver_lbdev_reply.status < 0) {
|
|
printf("Filter: unexpected/invalid reply from "
|
|
"backup driver (%x, %d)\n",
|
|
m2.m_type,
|
|
m2.m_lblockdriver_lbdev_reply.status);
|
|
|
|
return bad_driver(DRIVER_BACKUP, BD_PROTO,
|
|
m2.m_type == BDEV_REPLY ?
|
|
m2.m_lblockdriver_lbdev_reply.status :
|
|
EFAULT);
|
|
}
|
|
if (m2.m_lblockdriver_lbdev_reply.status != (ssize_t) *sizep) {
|
|
printf("Filter: truncated reply from backup driver\n");
|
|
|
|
/* As above */
|
|
if (m2.m_lblockdriver_lbdev_reply.status > (ssize_t) *sizep ||
|
|
(pos + (unsigned int)
|
|
m2.m_lblockdriver_lbdev_reply.status
|
|
< disk_size))
|
|
return bad_driver(DRIVER_BACKUP, BD_PROTO,
|
|
EFAULT);
|
|
|
|
/* Return the actual size. */
|
|
if ((ssize_t)*sizep >= m2.m_lblockdriver_lbdev_reply.status)
|
|
*sizep = m2.m_lblockdriver_lbdev_reply.status;
|
|
}
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* ds_event *
|
|
*===========================================================================*/
|
|
void ds_event()
|
|
{
|
|
char key[DS_MAX_KEYLEN];
|
|
char *blkdriver_prefix = "drv.blk.";
|
|
u32_t value;
|
|
int type;
|
|
endpoint_t owner_endpoint;
|
|
int r;
|
|
int which;
|
|
|
|
/* Get the event and the owner from DS. */
|
|
r = ds_check(key, &type, &owner_endpoint);
|
|
if(r != OK) {
|
|
if(r != ENOENT)
|
|
printf("Filter: ds_event: ds_check failed: %d\n", r);
|
|
return;
|
|
}
|
|
r = ds_retrieve_u32(key, &value);
|
|
if(r != OK) {
|
|
printf("Filter: ds_event: ds_retrieve_u32 failed\n");
|
|
return;
|
|
}
|
|
|
|
/* Only check for VFS driver up events. */
|
|
if(strncmp(key, blkdriver_prefix, strlen(blkdriver_prefix))
|
|
|| value != DS_DRIVER_UP) {
|
|
return;
|
|
}
|
|
|
|
/* See if this is a driver we are responsible for. */
|
|
if(driver[DRIVER_MAIN].endpt == owner_endpoint) {
|
|
which = DRIVER_MAIN;
|
|
}
|
|
else if(driver[DRIVER_BACKUP].endpt == owner_endpoint) {
|
|
which = DRIVER_BACKUP;
|
|
}
|
|
else {
|
|
return;
|
|
}
|
|
|
|
/* Mark the driver as (re)started. */
|
|
driver[which].up_event = driver[which].up_event == UP_EXPECTED ?
|
|
UP_NONE : UP_PENDING;
|
|
}
|
|
|