Filter driver by Wu Bingzheng et al

This commit is contained in:
David van Moolenbroek 2009-12-02 10:08:58 +00:00
parent f197bcb435
commit be2087ecf9
17 changed files with 2856 additions and 2 deletions

View file

@ -23,7 +23,7 @@ case $#:$1 in
ttypa ttypb ttypc ttypd ttype ttypf \
ttyq0 ttyq1 ttyq2 ttyq3 ttyq4 ttyq5 ttyq6 ttyq7 ttyq8 ttyq9 \
ttyqa ttyqb ttyqc ttyqd ttyqe ttyqf \
eth klog random rescue
eth klog random rescue filter
;;
0:|1:-\?)
cat >&2 <<EOF
@ -48,6 +48,7 @@ Where key is one of the following:
kbd # Make /dev/kbd
kbdaux # Make /dev/kbdaux
rescue # Make /dev/rescue
filter # Make /dev/filter
video # Make /dev/video
std # All standard devices
EOF
@ -259,6 +260,11 @@ do
$e mknod klog c 15 0
$e chmod 600 klog
;;
filter)
# filter driver
$e mknod filter b 11 0
$e chmod 644 filter
;;
*)
echo "$0: don't know about $dev" >&2
ex=1

View file

@ -28,6 +28,7 @@ all install depend clean:
cd ./dpeth && $(MAKE) $@
cd ./log && $(MAKE) $@
cd ./bios_wini && $(MAKE) $@
cd ./filter && $(MAKE) $@
cd ./random && $(MAKE) $@
cd ./readclock && $(MAKE) $@
cd ./dp8390 && $(MAKE) $@

31
drivers/filter/Makefile Normal file
View file

@ -0,0 +1,31 @@
# Makefile for filter driver
DRIVER = filter
# programs, flags, etc.
CC = cc
CFLAGS = -DDEBUG=1 -DDEBUG2=0
LDFLAGS =
LIBS = -lsys
OBJ = main.o sum.o driver.o util.o optset.o crc.o md5.o
# build local binary
all build: $(DRIVER)
$(DRIVER): $(OBJ)
$(CC) -o $@ $(LDFLAGS) $(OBJ) $(LIBS)
# install with other drivers
install: /usr/sbin/$(DRIVER)
/usr/sbin/$(DRIVER): $(DRIVER)
install -o root -c $? $@
# clean up local files
clean:
rm -f *.o *.bak $(DRIVER)
depend:
mkdep "$(CC) -E $(CPPFLAGS)" *.c > .depend
# Include generated dependencies.
include .depend

88
drivers/filter/crc.c Normal file
View file

@ -0,0 +1,88 @@
/* CRC32 implementation taken from cksum.c */
/* Copyright 1991 by Vincent Archer
* You may freely redistribute this software, in source or binary
* form, provided that you do not alter this copyright mention in any
* way.
*/
#include <sys/types.h>
unsigned long crctab[] = {
0x7fffffff,
0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e,
0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d,
0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0,
0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63,
0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa,
0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75,
0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 0xc8d75180,
0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87,
0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5,
0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4,
0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b,
0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea,
0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541,
0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc,
0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f,
0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 0x5edef90e,
0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c,
0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b,
0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2,
0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671,
0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8,
0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767,
0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 0x36034af6,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795,
0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b,
0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82,
0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d,
0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8,
0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff,
0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee,
0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d,
0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c,
0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02,
0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
unsigned long compute_crc( unsigned char *b, size_t n)
{
int i;
unsigned long s = 0;
int aux = 0;
while (n-- > 0) {
/* Compute the index to the crc table */
i = (s >> 24) ^ ((unsigned int) (*b++));
if (i == 0) {
/* Replace an intermediate zero with the next value
* from the sequence */
i = aux++;
if (aux >= sizeof(crctab) / sizeof(crctab[0])) aux = 0;
}
/* New checksum value */
s = (s << 8) ^ crctab[i];
}
return(s);
}

6
drivers/filter/crc.h Normal file
View file

@ -0,0 +1,6 @@
#ifndef _CRC_H
#define _CRC_H
extern unsigned long compute_crc(unsigned char *b, size_t n);
#endif /* _CRC_H */

930
drivers/filter/driver.c Normal file
View file

@ -0,0 +1,930 @@
/* Filter driver - lowest layer - disk driver management */
#include "inc.h"
/* Drivers. */
static struct {
char *label;
int minor;
endpoint_t endpt;
int problem; /* one of BD_* */
int error; /* one of E*, only relevant if problem>0 */
int retries;
int kills;
} driver[2];
/* State variables. */
static endpoint_t self_ep;
static asynmsg_t amsgtable[2];
static int size_known = 0;
static u64_t disk_size;
static int problem_stats[BD_LAST] = { 0 };
/*===========================================================================*
* driver_open *
*===========================================================================*/
static int driver_open(int which)
{
/* Perform an open or close operation on the driver. This is
* unfinished code: we should never be doing a blocking sendrec() to
* the driver.
*/
message msg;
cp_grant_id_t gid;
struct partition part;
sector_t sectors;
int r;
msg.m_type = DEV_OPEN;
msg.DEVICE = driver[which].minor;
msg.IO_ENDPT = self_ep;
r = sendrec(driver[which].endpt, &msg);
if (r != OK) {
/* Should we restart the driver now? */
printf("Filter: driver_open: sendrec returned %d\n", r);
return RET_REDO;
}
if(msg.m_type != TASK_REPLY || msg.REP_STATUS != OK) {
printf("Filter: driver_open: sendrec returned %d, %d\n",
msg.m_type, msg.REP_STATUS);
return RET_REDO;
}
/* Take the opportunity to retrieve the hard disk size. */
gid = cpf_grant_direct(driver[which].endpt,
(vir_bytes) &part, sizeof(part), CPF_WRITE);
if(!GRANT_VALID(gid))
panic(__FILE__, "invalid grant", gid);
msg.m_type = DEV_IOCTL_S;
msg.REQUEST = DIOCGETP;
msg.DEVICE = driver[which].minor;
msg.IO_ENDPT = self_ep;
msg.IO_GRANT = (char *) gid;
r = sendrec(driver[which].endpt, &msg);
cpf_revoke(gid);
if (r != OK || msg.m_type != TASK_REPLY || msg.REP_STATUS != OK) {
/* Not sure what to do here, either. */
printf("Filter: ioctl(DIOCGETP) returned (%d, %d)\n",
r, msg.m_type);
return RET_REDO;
}
if(!size_known) {
disk_size = part.size;
size_known = 1;
sectors = div64u(disk_size, SECTOR_SIZE);
if(cmp64(mul64u(sectors, SECTOR_SIZE), disk_size)) {
printf("Filter: partition too large\n");
return RET_REDO;
}
#if DEBUG
printf("Filter: partition size: 0x%s / %lu sectors\n",
print64(disk_size), sectors);
#endif
} else {
if(cmp64(disk_size, part.size)) {
printf("Filter: partition size mismatch (%s != %s)\n",
print64(part.size), print64(disk_size));
return RET_REDO;
}
}
return OK;
}
/*===========================================================================*
* driver_close *
*===========================================================================*/
static int driver_close(int which)
{
message msg;
int r;
msg.m_type = DEV_CLOSE;
msg.DEVICE = driver[which].minor;
msg.IO_ENDPT = self_ep;
r = sendrec(driver[which].endpt, &msg);
if (r != OK) {
/* Should we restart the driver now? */
printf("Filter: driver_close: sendrec returned %d\n", r);
return RET_REDO;
}
if(msg.m_type != TASK_REPLY || msg.REP_STATUS != OK) {
printf("Filter: driver_close: sendrec returned %d, %d\n",
msg.m_type, msg.REP_STATUS);
return RET_REDO;
}
return OK;
}
/*===========================================================================*
* driver_init *
*===========================================================================*/
void driver_init(void)
{
/* Initialize the driver layer. */
int r;
self_ep = getprocnr();
memset(driver, 0, sizeof(driver));
/* Endpoints unknown. */
driver[DRIVER_MAIN].endpt = NONE;
driver[DRIVER_BACKUP].endpt = NONE;
/* Get disk driver's and this proc's endpoint. */
driver[DRIVER_MAIN].label = MAIN_LABEL;
driver[DRIVER_MAIN].minor = MAIN_MINOR;
r = ds_retrieve_u32(driver[DRIVER_MAIN].label,
(u32_t *) &driver[DRIVER_MAIN].endpt);
if (r != OK) {
printf("Filter: failed to get main disk driver's endpoint: "
"%d\n", r);
bad_driver(DRIVER_MAIN, BD_DEAD, EFAULT);
check_driver(DRIVER_MAIN);
}
else if (driver_open(DRIVER_MAIN) != OK) {
panic(__FILE__, "unhandled driver_open failure", NO_NUM);
}
if(USE_MIRROR) {
driver[DRIVER_BACKUP].label = BACKUP_LABEL;
driver[DRIVER_BACKUP].minor = BACKUP_MINOR;
if(!strcmp(driver[DRIVER_MAIN].label,
driver[DRIVER_BACKUP].label)) {
panic(__FILE__, "same driver: not tested", NO_NUM);
}
r = ds_retrieve_u32(driver[DRIVER_BACKUP].label,
(u32_t *) &driver[DRIVER_BACKUP].endpt);
if (r != OK) {
printf("Filter: failed to get backup disk driver's "
"endpoint: %d\n", r);
bad_driver(DRIVER_BACKUP, BD_DEAD, EFAULT);
check_driver(DRIVER_BACKUP);
}
else if (driver_open(DRIVER_BACKUP) != OK) {
panic(__FILE__, "unhandled driver_open failure",
NO_NUM);
}
}
}
/*===========================================================================*
* driver_shutdown *
*===========================================================================*/
void driver_shutdown(void)
{
/* Clean up. */
#if DEBUG
printf("Filter: %u driver deaths, %u protocol errors, "
"%u data errors\n", problem_stats[BD_DEAD],
problem_stats[BD_PROTO], problem_stats[BD_DATA]);
#endif
if(driver_close(DRIVER_MAIN) != OK)
printf("Filter: DEV_CLOSE failed on shutdown (1)\n");
if(USE_MIRROR)
if(driver_close(DRIVER_BACKUP) != OK)
printf("Filter: DEV_CLOSE failed on shutdown (2)\n");
}
/*===========================================================================*
* get_raw_size *
*===========================================================================*/
u64_t get_raw_size(void)
{
/* Return the size of the raw disks as used by the filter driver.
*/
return disk_size;
}
/*===========================================================================*
* reset_kills *
*===========================================================================*/
void reset_kills(void)
{
/* Reset kill and retry statistics. */
driver[DRIVER_MAIN].kills = 0;
driver[DRIVER_MAIN].retries = 0;
driver[DRIVER_BACKUP].kills = 0;
driver[DRIVER_BACKUP].retries = 0;
}
/*===========================================================================*
* bad_driver *
*===========================================================================*/
int bad_driver(int which, int type, int error)
{
/* A disk driver has died or produced an error. Mark it so that we can
* deal with it later, and return RET_REDO to indicate that the
* current operation is to be retried. Also store an error code to
* return to the user if the situation is unrecoverable.
*/
driver[which].problem = type;
driver[which].error = error;
return RET_REDO;
}
/*===========================================================================*
* new_driver_ep *
*===========================================================================*/
static int new_driver_ep(int which)
{
/* See if a new driver instance has already been started for the given
* driver, by retrieving its entry from DS.
*/
int r;
endpoint_t endpt;
r = ds_retrieve_u32(driver[which].label, (u32_t *) &endpt);
if (r != OK) {
printf("Filter: DS query for %s failed\n",
driver[which].label);
return 0;
}
if (endpt == driver[which].endpt) {
#if DEBUG
printf("Filter: same endpoint for %s\n", driver[which].label);
#endif
return 0;
}
#if DEBUG
printf("Filter: new enpdoint for %s: %d -> %d\n", driver[which].label,
driver[which].endpt, endpt);
#endif
driver[which].endpt = endpt;
return 1;
}
/*===========================================================================*
* check_problem *
*===========================================================================*/
static int check_problem(int which, int problem, int retries, int *tell_rs)
{
/* A problem has occurred with a driver. Update statistics, and decide
* what to do. If EAGAIN is returned, the driver should be restarted;
* any other result will be passed up.
*/
#if DEBUG
printf("Filter: check_driver processing driver %d, problem %d\n",
which, problem);
#endif
problem_stats[problem]++;
if(new_driver_ep(which)) {
#if DEBUG
printf("Filter: check_problem: noticed a new driver\n");
#endif
if(driver_open(which) == OK) {
#if DEBUG2
printf("Filter: open OK -> no recovery\n");
#endif
return OK;
} else {
#if DEBUG2
printf("Filter: open not OK -> recovery\n");
#endif
problem = BD_PROTO;
problem_stats[problem]++;
}
}
/* If the driver has died, we always need to restart it. If it has
* been giving problems, we first retry the request, up to N times,
* after which we kill and restart the driver. We restart the driver
* up to M times, after which we remove the driver from the mirror
* configuration. If we are not set up to do mirroring, we can only
* do one thing, and that is continue to limp along with the bad
* driver..
*/
switch(problem) {
case BD_PROTO:
case BD_DATA:
driver[which].retries++;
#if DEBUG
printf("Filter: disk driver %d has had "
"%d/%d retry attempts, %d/%d kills\n", which,
driver[which].retries, NR_RETRIES,
driver[which].kills, NR_RESTARTS);
#endif
if (driver[which].retries < NR_RETRIES) {
if(retries == 1) {
#if DEBUG
printf("Filter: not restarting; retrying "
"(retries %d/%d, kills %d/%d)\n",
driver[which].retries, NR_RETRIES,
driver[which].kills, NR_RESTARTS);
#endif
return OK;
}
#if DEBUG
printf("Filter: restarting (retries %d/%d, "
"kills %d/%d, internal retry %d)\n",
driver[which].retries, NR_RETRIES,
driver[which].kills, NR_RESTARTS, retries);
#endif
}
#if DEBUG
printf("Filter: disk driver %d has reached error "
"threshold, restarting driver\n", which);
#endif
*tell_rs = 1;
break;
case BD_DEAD:
/* Can't kill that which is already dead.. */
*tell_rs = 0;
break;
default:
panic(__FILE__, "invalid problem", problem);
}
/* At this point, the driver will be restarted. */
driver[which].retries = 0;
driver[which].kills++;
if (driver[which].kills < NR_RESTARTS)
return EAGAIN;
/* We've reached the maximum number of restarts for this driver. */
if (USE_MIRROR) {
printf("Filter: kill threshold reached, disabling mirroring\n");
USE_MIRROR = 0;
if (which == DRIVER_MAIN) {
driver[DRIVER_MAIN] = driver[DRIVER_BACKUP];
/* This is not necessary. */
strcpy(MAIN_LABEL, BACKUP_LABEL);
MAIN_MINOR = BACKUP_MINOR;
}
driver[DRIVER_BACKUP].endpt = NONE;
return OK;
}
else {
/* We tried, we really did. But now we give up. Tell the user.
*/
printf("Filter: kill threshold reached, returning error\n");
if (driver[which].error == EAGAIN) return EIO;
return driver[which].error;
}
}
/*===========================================================================*
* restart_driver *
*===========================================================================*/
static void restart_driver(int which, int tell_rs)
{
/* Restart the given driver. Block until the new instance is up.
*/
message msg;
endpoint_t endpt;
int r, w = 0;
if (tell_rs) {
/* Tell RS to refresh or restart the driver */
msg.m_type = RS_REFRESH;
msg.RS_CMD_ADDR = driver[which].label;
msg.RS_CMD_LEN = strlen(driver[which].label);
#if DEBUG
printf("Filter: asking RS to refresh %s..\n",
driver[which].label);
#endif
r = sendrec(RS_PROC_NR, &msg);
if (r != OK || msg.m_type != OK)
panic(__FILE__, "RS request failed", r);
#if DEBUG
printf("Filter: RS call succeeded\n");
#endif
}
/* Wait until the new driver instance is up, and get its endpoint. */
#if DEBUG
printf("Filter: endpoint update driver %d; old endpoint %d\n",
which, driver[which].endpt);
#endif
do {
if(w) flt_sleep(1);
w = 1;
r = ds_retrieve_u32(driver[which].label, (u32_t *) &endpt);
#if DEBUG2
if (r != OK)
printf("Filter: DS request failed (%d)\n", r);
else if (endpt == driver[which].endpt)
printf("Filter: DS returned same endpoint\n");
else
printf("Filter: DS request OK, new endpoint\n");
#endif
} while (r != OK || endpt == driver[which].endpt);
driver[which].endpt = endpt;
}
/*===========================================================================*
* check_driver *
*===========================================================================*/
int check_driver(int which)
{
/* See if the given driver has been troublesome, and if so, deal with
* it.
*/
int problem, tell_rs;
int r, retries = 0;
problem = driver[which].problem;
if (problem == BD_NONE)
return OK;
do {
if(retries) {
#if DEBUG
printf("Filter: check_driver: retry number %d\n",
retries);
#endif
problem = BD_PROTO;
}
retries++;
driver[which].problem = BD_NONE;
/* Decide what to do: continue operation, restart the driver,
* or return an error.
*/
r = check_problem(which, problem, retries, &tell_rs);
if (r != EAGAIN)
return r;
/* Restarting the driver it is. First tell RS (if necessary),
* then wait for the new driver instance to come up.
*/
restart_driver(which, tell_rs);
/* Finally, open the device on the new driver */
} while (driver_open(which) != OK);
#if DEBUG
printf("Filter: check_driver restarted driver %d, endpoint %d\n",
which, driver[which].endpt);
#endif
return OK;
}
/*===========================================================================*
* flt_senda *
*===========================================================================*/
static int flt_senda(message *mess, int which)
{
/* Send a message to one driver. Can only return OK at the moment. */
int r;
asynmsg_t *amp;
/* Fill in the last bits of the message. */
mess->DEVICE = driver[which].minor;
mess->IO_ENDPT = self_ep;
/* Send the message asynchronously. */
amp = &amsgtable[which];
amp->dst = driver[which].endpt;
amp->msg = *mess;
amp->flags = AMF_VALID;
r = senda(amsgtable, 2);
if(r != OK)
panic(__FILE__, "senda returned error", r);
return r;
}
/*===========================================================================*
* check_senda *
*===========================================================================*/
static int check_senda(int which)
{
/* Check whether an earlier senda resulted in an error indicating the
* message never got delivered. Only in that case can we reliably say
* that the driver died. Return BD_DEAD in this case, and BD_PROTO
* otherwise.
*/
asynmsg_t *amp;
amp = &amsgtable[which];
if ((amp->flags & AMF_DONE) &&
(amp->result == EDEADSRCDST || amp->result == EDSTDIED)) {
return BD_DEAD;
}
return BD_PROTO;
}
/*===========================================================================*
* flt_receive *
*===========================================================================*/
static int flt_receive(message *mess, int which)
{
/* Receive a message from one or either driver, unless a timeout
* occurs. Can only return OK or RET_REDO.
*/
int r;
for (;;) {
r = receive(ANY, mess);
if(r != OK)
panic(__FILE__, "receive returned error", r);
if(mess->m_source == CLOCK && is_notify(mess->m_type)) {
if (mess->NOTIFY_TIMESTAMP < flt_alarm(-1)) {
#if DEBUG
printf("Filter: SKIPPING old alarm "
"notification\n");
#endif
continue;
}
#if DEBUG
printf("Filter: timeout waiting for disk driver %d "
"reply!\n", which);
#endif
/* If we're waiting for either driver,
* both are at fault.
*/
if (which < 0) {
bad_driver(DRIVER_MAIN,
check_senda(DRIVER_MAIN), EFAULT);
return bad_driver(DRIVER_BACKUP,
check_senda(DRIVER_BACKUP), EFAULT);
}
/* Otherwise, just report the one not replying as dead.
*/
return bad_driver(which, check_senda(which), EFAULT);
}
if (mess->m_source != driver[DRIVER_MAIN].endpt &&
mess->m_source != driver[DRIVER_BACKUP].endpt) {
#if DEBUG
printf("Filter: got STRAY message %d from %d\n",
mess->m_type, mess->m_source);
#endif
continue;
}
/* We are waiting for a reply from one specific driver. */
if (which >= 0) {
/* If the message source is that driver, good. */
if (mess->m_source == driver[which].endpt)
break;
/* This should probably be treated as a real protocol
* error. We do not abort any receives (not even paired
* receives) except because of timeouts. Getting here
* means a driver replied at least the timeout period
* later than expected, which should be enough reason
* to kill it really. The other explanation is that it
* is actually violating the protocol and sending bogus
* messages...
*/
#if DEBUG
printf("Filter: got UNEXPECTED reply from %d\n",
mess->m_source);
#endif
continue;
}
/* We got a message from one of the drivers, and we didn't
* care which one we wanted to receive from. A-OK.
*/
break;
}
return OK;
}
/*===========================================================================*
* flt_sendrec *
*===========================================================================*/
static int flt_sendrec(message *mess, int which)
{
int r;
r = flt_senda(mess, which);
if(r != OK)
return r;
if(check_senda(which) == BD_DEAD) {
return bad_driver(which, BD_DEAD, EFAULT);
}
/* Set alarm. */
flt_alarm(DRIVER_TIMEOUT);
r = flt_receive(mess, which);
/* Clear the alarm. */
flt_alarm(0);
return r;
}
/*===========================================================================*
* do_sendrec_both *
*===========================================================================*/
static int do_sendrec_both(message *m1, message *m2)
{
/* If USEE_MIRROR is set, call flt_sendrec() to both drivers.
* Otherwise, only call flt_sendrec() to the main driver.
* This function will only return either OK or RET_REDO.
*/
int r, which = -1;
message ma, mb;
/* If the two disks use the same driver, call flt_sendrec() twice
* sequentially. Such a setup is not very useful though.
*/
if (!strcmp(driver[DRIVER_MAIN].label, driver[DRIVER_BACKUP].label)) {
if ((r = flt_sendrec(m1, DRIVER_MAIN)) != OK) return r;
return flt_sendrec(m2, DRIVER_BACKUP);
}
/* If the two disks use different drivers, call flt_senda()
* twice, and then flt_receive(), and distinguish the return
* messages by means of m_source.
*/
if ((r = flt_senda(m1, DRIVER_MAIN)) != OK) return r;
if ((r = flt_senda(m2, DRIVER_BACKUP)) != OK) return r;
/* Set alarm. */
flt_alarm(DRIVER_TIMEOUT);
/* The message received by the 1st flt_receive() may not be
* from DRIVER_MAIN.
*/
if ((r = flt_receive(&ma, -1)) != OK) {
flt_alarm(0);
return r;
}
if (ma.m_source == driver[DRIVER_MAIN].endpt) {
which = DRIVER_BACKUP;
} else if (ma.m_source == driver[DRIVER_BACKUP].endpt) {
which = DRIVER_MAIN;
} else {
panic(__FILE__, "message from unexpected source",
ma.m_source);
}
r = flt_receive(&mb, which);
/* Clear the alarm. */
flt_alarm(0);
if(r != OK)
return r;
if (ma.m_source == driver[DRIVER_MAIN].endpt) {
*m1 = ma;
*m2 = mb;
} else {
*m1 = mb;
*m2 = ma;
}
return OK;
}
/*===========================================================================*
* do_sendrec_one *
*===========================================================================*/
static int do_sendrec_one(message *m1, message *m2)
{
/* Only talk to the main driver. If something goes wrong, it will
* be fixed elsewhere.
* This function will only return either OK or RET_REDO.
*/
return flt_sendrec(m1, DRIVER_MAIN);
}
/*===========================================================================*
* paired_sendrec *
*===========================================================================*/
static int paired_sendrec(message *m1, message *m2, int both)
{
/* Sendrec with the disk driver. If the disk driver is down, and was
* restarted, redo the request, until the driver works fine, or can't
* be restarted again.
*/
int r;
#if DEBUG2
printf("paired_sendrec(%d) - <%d,%x:%x,%d> - %x,%x\n",
both, m1->m_type, m1->HIGHPOS, m1->POSITION, m1->COUNT,
m1->IO_GRANT, m2->IO_GRANT);
#endif
if (both)
r = do_sendrec_both(m1, m2);
else
r = do_sendrec_one(m1, m2);
#if DEBUG2
if (r != OK)
printf("paired_sendrec about to return %d\n", r);
#endif
return r;
}
/*===========================================================================*
* paired_grant *
*===========================================================================*/
static void paired_grant(char *buf1, char *buf2, size_t size, int request,
cp_grant_id_t *gids, int both)
{
/* Create memory grants. If USE_MIRROR, grant to both drivers,
* otherwise only to the main one.
*/
cp_grant_id_t gid;
int access;
access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE;
if(driver[DRIVER_MAIN].endpt > 0) {
gid = cpf_grant_direct(driver[DRIVER_MAIN].endpt,
(vir_bytes) buf1, size, access);
if(!GRANT_VALID(gid))
panic(__FILE__, "invalid grant", gid);
gids[0] = gid;
}
if (both) {
if(driver[DRIVER_BACKUP].endpt > 0) {
gid = cpf_grant_direct(driver[DRIVER_BACKUP].endpt,
(vir_bytes) buf2, size, access);
if(!GRANT_VALID(gid))
panic(__FILE__, "invalid grant", gid);
gids[1] = gid;
}
}
}
/*===========================================================================*
* paired_revoke *
*===========================================================================*/
static void paired_revoke(cp_grant_id_t gid1, cp_grant_id_t gid2, int both)
{
cpf_revoke(gid1);
if (both)
cpf_revoke(gid2);
}
/*===========================================================================*
* read_write *
*===========================================================================*/
int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
{
message m1, m2;
cp_grant_id_t gids[2];
int r, both;
gids[0] = gids[1] = GRANT_INVALID;
/* Send two requests only if mirroring is enabled and the given request
* is either FLT_READ2 or FLT_WRITE.
*/
both = (USE_MIRROR && request != FLT_READ);
m1.m_type = (request == FLT_WRITE) ? DEV_WRITE_S : DEV_READ_S;
m1.COUNT = *sizep;
m1.POSITION = ex64lo(pos);
m1.HIGHPOS = ex64hi(pos);
m2 = m1;
paired_grant(bufa, bufb, *sizep, request, gids, both);
m1.IO_GRANT = (char *) gids[0];
m2.IO_GRANT = (char *) gids[1];
r = paired_sendrec(&m1, &m2, both);
paired_revoke(gids[0], gids[1], both);
if(r != OK) {
#if DEBUG
if (r != RET_REDO)
printf("Filter: paired_sendrec returned %d\n", r);
#endif
return r;
}
if (m1.m_type != TASK_REPLY || m1.REP_STATUS < 0) {
printf("Filter: unexpected/invalid reply from main driver: "
"(%x, %d)\n", m1.m_type, m1.REP_STATUS);
return bad_driver(DRIVER_MAIN, BD_PROTO,
(m1.m_type == TASK_REPLY) ? m1.REP_STATUS : EFAULT);
}
if (m1.REP_STATUS != *sizep) {
printf("Filter: truncated reply %u to I/O request of size "
"0x%x at 0x%s; size 0x%s\n",
m1.REP_STATUS, *sizep,
print64(pos), print64(disk_size));
/* If the driver returned a value *larger* than we requested,
* OR if we did NOT exceed the disk size, then we should
* report the driver for acting strangely!
*/
if (m1.REP_STATUS > *sizep ||
cmp64(add64u(pos, *sizep), disk_size) < 0)
return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT);
/* Return the actual size. */
*sizep = m1.REP_STATUS;
}
if (both) {
if (m2.m_type != TASK_REPLY || m2.REP_STATUS < 0) {
printf("Filter: unexpected/invalid reply from "
"backup driver (%x, %d)\n",
m2.m_type, m2.REP_STATUS);
return bad_driver(DRIVER_BACKUP, BD_PROTO,
m2.m_type == TASK_REPLY ? m2.REP_STATUS :
EFAULT);
}
if (m2.REP_STATUS != *sizep) {
printf("Filter: truncated reply from backup driver\n");
/* As above */
if (m2.REP_STATUS > *sizep ||
cmp64(add64u(pos, *sizep), disk_size) < 0)
return bad_driver(DRIVER_BACKUP, BD_PROTO,
EFAULT);
/* Return the actual size. */
if (*sizep >= m2.REP_STATUS)
*sizep = m2.REP_STATUS;
}
}
return OK;
}

101
drivers/filter/inc.h Normal file
View file

@ -0,0 +1,101 @@
/* Filter driver - general include file */
#define _MINIX 1
#define _SYSTEM 1
#include <minix/config.h>
#include <minix/const.h>
#include <minix/type.h>
#include <minix/com.h>
#include <minix/ipc.h>
#include <sys/ioc_disk.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/partition.h>
#include <minix/ds.h>
#include <minix/callnr.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#define SECTOR_SIZE 512
enum {
ST_XOR, /* XOR-based checksums */
ST_CRC, /* CRC32-based checksums */
ST_MD5 /* MD5-based checksums */
};
enum {
FLT_WRITE, /* write to up to two disks */
FLT_READ, /* read from one disk */
FLT_READ2 /* read from both disks */
};
/* Something was wrong and the disk driver has been restarted/refreshed,
* so the request needs to be redone.
*/
#define RET_REDO 1
/* The cases where the disk driver need to be restarted/refreshed by RS.
* BD_DEAD: the disk driver has died. Restart it.
* BD_PROTO: a protocol error has occurred. Refresh it.
* BD_DATA: a data error has occurred. Refresh it.
*/
enum {
BD_NONE,
BD_DEAD,
BD_PROTO,
BD_DATA,
BD_LAST
};
#define DRIVER_MAIN 0
#define DRIVER_BACKUP 1
/* Requests for more than this many bytes need to go through malloc(). */
#define BUF_SIZE (128 * 1024)
#define SBUF_SIZE (BUF_SIZE * 2)
#define LABEL_SIZE 32
typedef unsigned long sector_t;
/* main.c */
extern int USE_CHECKSUM;
extern int USE_MIRROR;
extern int BAD_SUM_ERROR;
extern int USE_SUM_LAYOUT;
extern int SUM_TYPE;
extern int SUM_SIZE;
extern int NR_SUM_SEC;
extern int NR_RETRIES;
extern int NR_RESTARTS;
extern int DRIVER_TIMEOUT;
extern char MAIN_LABEL[LABEL_SIZE];
extern char BACKUP_LABEL[LABEL_SIZE];
extern int MAIN_MINOR;
extern int BACKUP_MINOR;
/* sum.c */
extern void sum_init(void);
extern int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw);
extern u64_t convert(u64_t size);
/* driver.c */
extern void driver_init(void);
extern void driver_shutdown(void);
extern u64_t get_raw_size(void);
extern void reset_kills(void);
extern int check_driver(int which);
extern int bad_driver(int which, int type, int error);
extern int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep,
int flag_rw);
/* util.c */
extern char *flt_malloc(size_t size, char *sbuf, size_t ssize);
extern void flt_free(char *buf, size_t size, char *sbuf);
extern char *print64(u64_t p);
extern clock_t flt_alarm(clock_t dt);
extern void flt_sleep(int secs);

446
drivers/filter/main.c Normal file
View file

@ -0,0 +1,446 @@
/* Filter driver - top layer - block interface */
/* This is a filter driver, which lays above disk driver, and forwards
* messages between disk driver and its callers. The filter can detect
* corrupted data (toggled by USE_CHECKSUM) and recover it (toggled
* by USE_MIRROR). These two functions are independent from each other.
* The mirroring function requires two disks, on separate disk drivers.
*/
#include "inc.h"
#include "optset.h"
#define _POSIX_SOURCE 1
#include <signal.h>
/* Global settings. */
int USE_CHECKSUM = 0; /* enable checksumming */
int USE_MIRROR = 0; /* enable mirroring */
int BAD_SUM_ERROR = 1; /* bad checksums are considered a driver error */
int USE_SUM_LAYOUT = 0; /* use checksumming layout on disk */
int NR_SUM_SEC = 8; /* number of checksums per checksum sector */
int SUM_TYPE = 0; /* use XOR, CRC or MD5 */
int SUM_SIZE = 0; /* size of the stored checksum */
int NR_RETRIES = 3; /* number of times the request will be retried (N) */
int NR_RESTARTS = 3; /* number of times a driver will be restarted (M) */
int DRIVER_TIMEOUT = 5; /* timeout in seconds to declare a driver dead (T) */
char MAIN_LABEL[LABEL_SIZE] = ""; /* main disk driver label */
char BACKUP_LABEL[LABEL_SIZE] = ""; /* backup disk driver label */
int MAIN_MINOR = -1; /* main partition minor nr */
int BACKUP_MINOR = -1; /* backup partition minor nr */
struct optset optset_table[] = {
{ "label0", OPT_STRING, MAIN_LABEL, LABEL_SIZE },
{ "label1", OPT_STRING, BACKUP_LABEL, LABEL_SIZE },
{ "minor0", OPT_INT, &MAIN_MINOR, 10 },
{ "minor1", OPT_INT, &BACKUP_MINOR, 10 },
{ "sum_sec", OPT_INT, &NR_SUM_SEC, 10 },
{ "layout", OPT_BOOL, &USE_SUM_LAYOUT, 1 },
{ "nolayout", OPT_BOOL, &USE_SUM_LAYOUT, 0 },
{ "sum", OPT_BOOL, &USE_CHECKSUM, 1 },
{ "nosum", OPT_BOOL, &USE_CHECKSUM, 0 },
{ "mirror", OPT_BOOL, &USE_MIRROR, 1 },
{ "nomirror", OPT_BOOL, &USE_MIRROR, 0 },
{ "xor", OPT_BOOL, &SUM_TYPE, ST_XOR },
{ "crc", OPT_BOOL, &SUM_TYPE, ST_CRC },
{ "md5", OPT_BOOL, &SUM_TYPE, ST_MD5 },
{ "sumerr", OPT_BOOL, &BAD_SUM_ERROR, 1 },
{ "nosumerr", OPT_BOOL, &BAD_SUM_ERROR, 0 },
{ "retries", OPT_INT, &NR_RETRIES, 10 },
{ "N", OPT_INT, &NR_RETRIES, 10 },
{ "restarts", OPT_INT, &NR_RESTARTS, 10 },
{ "M", OPT_INT, &NR_RESTARTS, 10 },
{ "timeout", OPT_INT, &DRIVER_TIMEOUT, 10 },
{ "T", OPT_INT, &DRIVER_TIMEOUT, 10 },
{ NULL }
};
/* Request message. */
static message m_in;
static endpoint_t who_e; /* m_source */
static endpoint_t proc_e; /* IO_ENDPT */
static cp_grant_id_t grant_id; /* IO_GRANT */
/* Data buffers. */
static char *buf_array, *buffer; /* contiguous buffer */
/*===========================================================================*
* carry *
*===========================================================================*/
static int carry(size_t size, int flag_rw)
{
/* Carry data between caller proc and filter.
*/
if (flag_rw == FLT_WRITE)
return sys_safecopyfrom(proc_e, grant_id, 0,
(vir_bytes) buffer, size, D);
else
return sys_safecopyto(proc_e, grant_id, 0,
(vir_bytes) buffer, size, D);
}
/*===========================================================================*
* vcarry *
*===========================================================================*/
static int vcarry(int grants, iovec_t *iov, int flag_rw, size_t size)
{
/* Carry data between caller proc and filter, through grant-vector.
*/
char *bufp;
int i, r;
size_t bytes;
bufp = buffer;
for(i = 0; i < grants && size > 0; i++) {
bytes = MIN(size, iov[i].iov_size);
if (flag_rw == FLT_WRITE)
r = sys_safecopyfrom(proc_e,
(vir_bytes) iov[i].iov_addr, 0,
(vir_bytes) bufp, bytes, D);
else
r = sys_safecopyto(proc_e,
(vir_bytes) iov[i].iov_addr, 0,
(vir_bytes) bufp, bytes, D);
if(r != OK)
return r;
bufp += bytes;
size -= bytes;
}
return OK;
}
/*===========================================================================*
* do_rdwt *
*===========================================================================*/
static int do_rdwt(int flag_rw)
{
size_t size, size_ret;
u64_t pos;
int r;
pos = make64(m_in.POSITION, m_in.HIGHPOS);
size = m_in.COUNT;
if (rem64u(pos, SECTOR_SIZE) != 0 || size % SECTOR_SIZE != 0) {
printf("Filter: unaligned request from caller!\n");
return EINVAL;
}
buffer = flt_malloc(size, buf_array, BUF_SIZE);
if(flag_rw == FLT_WRITE)
carry(size, flag_rw);
reset_kills();
for (;;) {
size_ret = size;
r = transfer(pos, buffer, &size_ret, flag_rw);
if(r != RET_REDO)
break;
#if DEBUG
printf("Filter: transfer yielded RET_REDO, checking drivers\n");
#endif
if((r = check_driver(DRIVER_MAIN)) != OK) break;
if((r = check_driver(DRIVER_BACKUP)) != OK) break;
}
if(r == OK && flag_rw == FLT_READ)
carry(size_ret, flag_rw);
flt_free(buffer, size, buf_array);
return r != OK ? r : size_ret;
}
/*===========================================================================*
* do_vrdwt *
*===========================================================================*/
static int do_vrdwt(int flag_rw)
{
size_t size, size_ret, bytes;
int grants;
int r, i;
u64_t pos;
iovec_t iov_proc[NR_IOREQS];
/* Extract informations. */
grants = m_in.COUNT;
if((r = sys_safecopyfrom(who_e, grant_id, 0, (vir_bytes) iov_proc,
grants * sizeof(iovec_t), D)) != OK) {
panic(__FILE__, "copying in grant vector failed", r);
}
pos = make64(m_in.POSITION, m_in.HIGHPOS);
for(size = 0, i = 0; i < grants; i++)
size += iov_proc[i].iov_size;
if (rem64u(pos, SECTOR_SIZE) != 0 || size % SECTOR_SIZE != 0) {
printf("Filter: unaligned request from caller!\n");
return EINVAL;
}
buffer = flt_malloc(size, buf_array, BUF_SIZE);
if(flag_rw == FLT_WRITE)
vcarry(grants, iov_proc, flag_rw, size);
reset_kills();
for (;;) {
size_ret = size;
r = transfer(pos, buffer, &size_ret, flag_rw);
if(r != RET_REDO)
break;
#if DEBUG
printf("Filter: transfer yielded RET_REDO, checking drivers\n");
#endif
if((r = check_driver(DRIVER_MAIN)) != OK) break;
if((r = check_driver(DRIVER_BACKUP)) != OK) break;
}
if(r != OK) {
flt_free(buffer, size, buf_array);
return r;
}
if(flag_rw == FLT_READ)
vcarry(grants, iov_proc, flag_rw, size_ret);
/* Set the result-iovec. */
for(i = 0; i < grants && size_ret > 0; i++) {
bytes = MIN(size_ret, iov_proc[i].iov_size);
iov_proc[i].iov_size -= bytes;
size_ret -= bytes;
}
/* Copy the caller's grant-table back. */
if((r = sys_safecopyto(who_e, grant_id, 0, (vir_bytes) iov_proc,
grants * sizeof(iovec_t), D)) != OK) {
panic(__FILE__, "copying out grant vector failed", r);
}
flt_free(buffer, size, buf_array);
return OK;
}
/*===========================================================================*
* do_ioctl *
*===========================================================================*/
static int do_ioctl(message *m)
{
struct partition sizepart;
switch(m->REQUEST) {
case DIOCSETP:
case DIOCTIMEOUT:
case DIOCOPENCT:
/* These do not make sense for us. */
return EINVAL;
case DIOCGETP:
memset(&sizepart, 0, sizeof(sizepart));
/* The presented disk size is the raw partition size,
* corrected for space needed for checksums.
*/
sizepart.size = convert(get_raw_size());
if(sys_safecopyto(proc_e, (vir_bytes) grant_id, 0,
(vir_bytes) &sizepart,
sizeof(struct partition), D) != OK) {
printf("Filter: DIOCGETP safecopyto failed\n");
return EIO;
}
break;
default:
printf("Filter: unknown ioctl request: %d!\n", m->REQUEST);
return EINVAL;
}
return OK;
}
/*===========================================================================*
* parse_arguments *
*===========================================================================*/
static int parse_arguments(int argc, char *argv[])
{
if(argc != 2)
return EINVAL;
optset_parse(optset_table, argv[1]);
if (MAIN_LABEL[0] == 0 || MAIN_MINOR < 0 || MAIN_MINOR > 255)
return EINVAL;
if (USE_MIRROR && (BACKUP_LABEL[0] == 0 ||
BACKUP_MINOR < 0 || BACKUP_MINOR > 255))
return EINVAL;
/* Checksumming implies a checksum layout. */
if (USE_CHECKSUM)
USE_SUM_LAYOUT = 1;
/* Determine the checksum size for the chosen checksum type. */
switch (SUM_TYPE) {
case ST_XOR:
SUM_SIZE = 16; /* compatibility */
break;
case ST_CRC:
SUM_SIZE = 4;
break;
case ST_MD5:
SUM_SIZE = 16;
break;
default:
return EINVAL;
}
if (NR_SUM_SEC <= 0 || SUM_SIZE * NR_SUM_SEC > SECTOR_SIZE)
return EINVAL;
#if DEBUG
printf("Filter starting. Configuration:\n");
printf(" USE_CHECKSUM : %3s ", USE_CHECKSUM ? "yes" : "no");
printf(" USE_MIRROR : %3s\n", USE_MIRROR ? "yes" : "no");
if (USE_CHECKSUM) {
printf(" BAD_SUM_ERROR : %3s ",
BAD_SUM_ERROR ? "yes" : "no");
printf(" NR_SUM_SEC : %3d\n", NR_SUM_SEC);
printf(" SUM_TYPE : ");
switch (SUM_TYPE) {
case ST_XOR: printf("xor"); break;
case ST_CRC: printf("crc"); break;
case ST_MD5: printf("md5"); break;
}
printf(" SUM_SIZE : %3d\n", SUM_SIZE);
}
else printf(" USE_SUM_LAYOUT : %3s\n", USE_SUM_LAYOUT ? "yes" : "no");
printf(" N : %3dx M : %3dx T : %3ds\n",
NR_RETRIES, NR_RESTARTS, DRIVER_TIMEOUT);
printf(" MAIN_LABEL / MAIN_MINOR : %19s / %d\n",
MAIN_LABEL, MAIN_MINOR);
if (USE_MIRROR) {
printf(" BACKUP_LABEL / BACKUP_MINOR : %15s / %d\n",
BACKUP_LABEL, BACKUP_MINOR);
}
#endif
/* Convert timeout seconds to ticks. */
DRIVER_TIMEOUT *= sys_hz();
return OK;
}
/*===========================================================================*
* got_signal *
*===========================================================================*/
static void got_signal(void)
{
sigset_t set;
/* See if PM sent us a SIGTERM. */
if (getsigset(&set) != 0) return;
if (!sigismember(&set, SIGTERM)) return;
/* If so, shut down this driver. */
#if DEBUG
printf("Filter: shutdown...\n");
#endif
driver_shutdown();
exit(0);
}
/*===========================================================================*
* main *
*===========================================================================*/
int main(int argc, char *argv[])
{
message m_out;
int r;
r = parse_arguments(argc, argv);
if(r != OK) {
printf("Filter: wrong argument!\n");
return 1;
}
if ((buf_array = alloc_contig(BUF_SIZE, 0, NULL)) == NULL)
panic(__FILE__, "no memory available", NO_NUM);
sum_init();
driver_init();
for (;;) {
/* Wait for request. */
if(receive(ANY, &m_in) != OK) {
panic(__FILE__, "receive failed", NO_NUM);
}
#if DEBUG2
printf("Filter: got request %d from %d\n",
m_in.m_type, m_in.m_source);
#endif
if (is_notify(m_in.m_type) && m_in.m_source == PM_PROC_NR)
got_signal();
who_e = m_in.m_source;
proc_e = m_in.IO_ENDPT;
grant_id = (cp_grant_id_t) m_in.IO_GRANT;
/* Forword the request message to the drivers. */
switch(m_in.m_type) {
case DEV_OPEN: /* open/close is a noop for filter. */
case DEV_CLOSE: r = OK; break;
case DEV_READ_S: r = do_rdwt(FLT_READ); break;
case DEV_WRITE_S: r = do_rdwt(FLT_WRITE); break;
case DEV_GATHER_S: r = do_vrdwt(FLT_READ); break;
case DEV_SCATTER_S: r = do_vrdwt(FLT_WRITE); break;
case DEV_IOCTL_S: r = do_ioctl(&m_in); break;
default:
printf("Filter: ignoring unknown request %d from %d\n",
m_in.m_type, m_in.m_source);
continue;
}
#if DEBUG2
printf("Filter: replying with code %d\n", r);
#endif
/* Send back reply message. */
m_out.m_type = TASK_REPLY;
m_out.REP_ENDPT = proc_e;
m_out.REP_STATUS = r;
send(who_e, &m_out);
}
return 0;
}

315
drivers/filter/md5.c Normal file
View file

@ -0,0 +1,315 @@
/*
* This code implements the MD5 message-digest algorithm.
* The algorithm is due to Ron Rivest. This code was
* written by Colin Plumb in 1993, no copyright is claimed.
* This code is in the public domain; do with it what you wish.
*
* Equivalent code is available from RSA Data Security, Inc.
* This code has been tested against that, and is equivalent,
* except that you don't need to include two pages of legalese
* with every copy.
*
* To compute the message digest of a chunk of bytes, declare an
* MD5Context structure, pass it to MD5Init, call MD5Update as
* needed on buffers full of bytes, and then call MD5Final, which
* will fill a supplied 16-byte array with the digest.
*/
/* This code was modified in 1997 by Jim Kingdon of Cyclic Software to
not require an integer type which is exactly 32 bits. This work
draws on the changes for the same purpose by Tatu Ylonen
<ylo@cs.hut.fi> as part of SSH, but since I didn't actually use
that code, there is no copyright issue. I hereby disclaim
copyright in any changes I have made; this code remains in the
public domain. */
#ifdef TEST
#include <stdlib.h>
#endif
#include <string.h> /* for memcpy() and memset() */
#include "md5.h"
/* Little-endian byte-swapping routines. Note that these do not
depend on the size of datatypes such as uint32, nor do they require
us to detect the endianness of the machine we are running on. It
is possible they should be macros for speed, but I would be
surprised if they were a performance bottleneck for MD5. */
static uint32
getu32 (const unsigned char *addr)
{
return (((((unsigned long)addr[3] << 8) | addr[2]) << 8)
| addr[1]) << 8 | addr[0];
}
static void
putu32 (uint32 data, unsigned char *addr)
{
addr[0] = (unsigned char)data;
addr[1] = (unsigned char)(data >> 8);
addr[2] = (unsigned char)(data >> 16);
addr[3] = (unsigned char)(data >> 24);
}
/*
* Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
* initialization constants.
*/
void
MD5Init (ctx)
struct MD5Context *ctx;
{
ctx->buf[0] = 0x67452301;
ctx->buf[1] = 0xefcdab89;
ctx->buf[2] = 0x98badcfe;
ctx->buf[3] = 0x10325476;
ctx->bits[0] = 0;
ctx->bits[1] = 0;
}
/*
* Update context to reflect the concatenation of another buffer full
* of bytes.
*/
void
MD5Update (ctx, buf, len)
struct MD5Context *ctx;
unsigned char const *buf;
unsigned len;
{
uint32 t;
/* Update bitcount */
t = ctx->bits[0];
if ((ctx->bits[0] = (t + ((uint32)len << 3)) & 0xffffffff) < t)
ctx->bits[1]++; /* Carry from low to high */
ctx->bits[1] += len >> 29;
t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
/* Handle any leading odd-sized chunks */
if ( t ) {
unsigned char *p = ctx->in + t;
t = 64-t;
if (len < t) {
memcpy(p, buf, len);
return;
}
memcpy(p, buf, t);
MD5Transform (ctx->buf, ctx->in);
buf += t;
len -= t;
}
/* Process data in 64-byte chunks */
while (len >= 64) {
memcpy(ctx->in, buf, 64);
MD5Transform (ctx->buf, ctx->in);
buf += 64;
len -= 64;
}
/* Handle any remaining bytes of data. */
memcpy(ctx->in, buf, len);
}
/*
* Final wrapup - pad to 64-byte boundary with the bit pattern
* 1 0* (64-bit count of bits processed, MSB-first)
*/
void
MD5Final (digest, ctx)
unsigned char digest[16];
struct MD5Context *ctx;
{
unsigned count;
unsigned char *p;
/* Compute number of bytes mod 64 */
count = (ctx->bits[0] >> 3) & 0x3F;
/* Set the first char of padding to 0x80. This is safe since there is
always at least one byte free */
p = ctx->in + count;
*p++ = 0x80;
/* Bytes of padding needed to make 64 bytes */
count = 64 - 1 - count;
/* Pad out to 56 mod 64 */
if (count < 8) {
/* Two lots of padding: Pad the first block to 64 bytes */
memset(p, 0, count);
MD5Transform (ctx->buf, ctx->in);
/* Now fill the next block with 56 bytes */
memset(ctx->in, 0, 56);
} else {
/* Pad block to 56 bytes */
memset(p, 0, count-8);
}
/* Append length in bits and transform */
putu32(ctx->bits[0], ctx->in + 56);
putu32(ctx->bits[1], ctx->in + 60);
MD5Transform (ctx->buf, ctx->in);
putu32(ctx->buf[0], digest);
putu32(ctx->buf[1], digest + 4);
putu32(ctx->buf[2], digest + 8);
putu32(ctx->buf[3], digest + 12);
memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */
}
#ifndef ASM_MD5
/* The four core functions - F1 is optimized somewhat */
/* #define F1(x, y, z) (x & y | ~x & z) */
#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))
/* This is the central step in the MD5 algorithm. */
#define MD5STEP(f, w, x, y, z, data, s) \
( w += f(x, y, z) + data, w &= 0xffffffff, w = w<<s | w>>(32-s), w += x )
/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
* reflect the addition of 16 longwords of new data. MD5Update blocks
* the data and converts bytes into longwords for this routine.
*/
void
MD5Transform (buf, inraw)
uint32 buf[4];
const unsigned char inraw[64];
{
register uint32 a, b, c, d;
uint32 in[16];
int i;
for (i = 0; i < 16; ++i)
in[i] = getu32 (inraw + 4 * i);
a = buf[0];
b = buf[1];
c = buf[2];
d = buf[3];
MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7);
MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12);
MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17);
MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22);
MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7);
MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12);
MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17);
MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22);
MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7);
MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12);
MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17);
MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22);
MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7);
MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12);
MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17);
MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22);
MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5);
MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9);
MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14);
MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20);
MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5);
MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9);
MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14);
MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20);
MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5);
MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9);
MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14);
MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20);
MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5);
MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9);
MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14);
MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20);
MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4);
MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11);
MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16);
MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23);
MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4);
MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11);
MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16);
MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23);
MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4);
MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11);
MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16);
MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23);
MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4);
MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11);
MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16);
MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23);
MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6);
MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10);
MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15);
MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21);
MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6);
MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10);
MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15);
MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21);
MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6);
MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10);
MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15);
MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21);
MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6);
MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10);
MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15);
MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21);
buf[0] += a;
buf[1] += b;
buf[2] += c;
buf[3] += d;
}
#endif
#ifdef TEST
/* Simple test program. Can use it to manually run the tests from
RFC1321 for example. */
#include <stdio.h>
int
main (int argc, char **argv)
{
struct MD5Context context;
unsigned char checksum[16];
int i;
int j;
if (argc < 2)
{
fprintf (stderr, "usage: %s string-to-hash\n", argv[0]);
exit (1);
}
for (j = 1; j < argc; ++j)
{
printf ("MD5 (\"%s\") = ", argv[j]);
MD5Init (&context);
MD5Update (&context, (unsigned char *)argv[j], strlen (argv[j]));
MD5Final (checksum, &context);
for (i = 0; i < 16; i++)
{
printf ("%02x", (unsigned int) checksum[i]);
}
printf ("\n");
}
return 0;
}
#endif /* TEST */

26
drivers/filter/md5.h Normal file
View file

@ -0,0 +1,26 @@
/* See md5.c for explanation and copyright information. */
#ifndef MD5_H
#define MD5_H
/* Unlike previous versions of this code, uint32 need not be exactly
32 bits, merely 32 bits or more. Choosing a data type which is 32
bits instead of 64 is not important; speed is considerably more
important. ANSI guarantees that "unsigned long" will be big enough,
and always using it seems to have few disadvantages. */
typedef unsigned long uint32;
struct MD5Context {
uint32 buf[4];
uint32 bits[2];
unsigned char in[64];
};
void MD5Init(struct MD5Context *context);
void MD5Update(struct MD5Context *context,
unsigned char const *buf, unsigned len);
void MD5Final(unsigned char digest[16],
struct MD5Context *context);
void MD5Transform(uint32 buf[4], const unsigned char in[64]);
#endif /* !MD5_H */

128
drivers/filter/optset.c Normal file
View file

@ -0,0 +1,128 @@
/* This file provides functionality to parse strings of comma-separated
* options, each being either a single key name or a key=value pair, where the
* value may be enclosed in quotes. A table of optset entries is provided to
* determine which options are recognized, how to parse their values, and where
* to store those. Unrecognized options are silently ignored; improperly
* formatted options are silently set to reasonably acceptable values.
*
* The entry points into this file are:
* optset_parse parse the given options string using the given table
*
* Created:
* May 2009 (D.C. van Moolenbroek)
*/
#define _MINIX 1
#include <stdlib.h>
#include <string.h>
#include <minix/config.h>
#include <minix/const.h>
#include "optset.h"
FORWARD _PROTOTYPE( void optset_parse_entry, (struct optset *entry,
char *ptr, int len) );
/*===========================================================================*
* optset_parse_entry *
*===========================================================================*/
PRIVATE void optset_parse_entry(entry, ptr, len)
struct optset *entry;
char *ptr;
int len;
{
/* Parse and store the value of a single option.
*/
char *dst;
int val;
switch (entry->os_type) {
case OPT_BOOL:
*((int *) entry->os_ptr) = entry->os_val;
break;
case OPT_STRING:
if (len >= entry->os_val)
len = entry->os_val - 1;
dst = (char *) entry->os_ptr;
if (len > 0)
memcpy(dst, ptr, len);
dst[len] = 0;
break;
case OPT_INT:
if (len > 0)
val = strtol(ptr, NULL, entry->os_val);
else
val = 0;
*((int *) entry->os_ptr) = val;
break;
}
}
/*===========================================================================*
* optset_parse *
*===========================================================================*/
PUBLIC void optset_parse(table, string)
struct optset *table;
char *string;
{
/* Parse a string of options, using the provided table of optset entries.
*/
char *p, *kptr, *vptr;
int i, klen, vlen;
for (p = string; *p; ) {
/* Get the key name for the field. */
for (kptr = p, klen = 0; *p && *p != '=' && *p != ','; p++, klen++);
if (*p == '=') {
/* The field has an associated value. */
vptr = ++p;
/* If the first character after the '=' is a quote character,
* find a matching quote character followed by either a comma
* or the terminating null character, and use the string in
* between. Otherwise, use the string up to the next comma or
* the terminating null character.
*/
if (*p == '\'' || *p == '"') {
p++;
for (vlen = 0; *p && (*p != *vptr ||
(p[1] && p[1] != ',')); p++, vlen++);
if (*p) p++;
vptr++;
}
else
for (vlen = 0; *p && *p != ','; p++, vlen++);
}
else {
vptr = NULL;
vlen = 0;
}
if (*p == ',') p++;
/* Find a matching entry for this key in the given table. If found,
* call optset_parse_entry() on it. Silently ignore the option
* otherwise.
*/
for (i = 0; table[i].os_name != NULL; i++) {
if (strlen(table[i].os_name) == klen &&
!strncasecmp(table[i].os_name, kptr, klen)) {
optset_parse_entry(&table[i], vptr, vlen);
break;
}
}
}
}

30
drivers/filter/optset.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef _OPTSET_H
#define _OPTSET_H
enum {
OPT_BOOL,
OPT_STRING,
OPT_INT
};
/* An entry for the parser of an options set. The 'os_name' field must point
* to a string, which is treated case-insensitively; the last entry of a table
* must have NULL name. The 'os_type' field must be set to one of the OPT_
* values defined above. The 'os_ptr' field must point to the field that is to
* receive the value of a recognized option. For OPT_STRING, it must point to a
* string of a size set in 'os_val'; the resulting string may be truncated, but
* will always be null-terminated. For OPT_BOOL, it must point to an int which
* will be set to the value in 'os_val' if the option is present. For OPT_INT,
* it must point to an int which will be set to the provided option value;
* 'os_val' is then a base passed to strtol().
*/
struct optset {
char *os_name;
int os_type;
void *os_ptr;
int os_val;
};
_PROTOTYPE( void optset_parse, (struct optset *table, char *string) );
#endif /* _OPTSET_H */

613
drivers/filter/sum.c Normal file
View file

@ -0,0 +1,613 @@
/* Filter driver - middle layer - checksumming */
#include "inc.h"
#include "crc.h"
#include "md5.h"
#define GROUP_SIZE (SECTOR_SIZE * NR_SUM_SEC)
#define SEC2SUM_NR(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + NR_SUM_SEC)
#define LOG2PHYS(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + (nr)%NR_SUM_SEC)
#define POS2SEC(nr) div64u((nr), SECTOR_SIZE)
#define SEC2POS(nr) mul64u((nr), SECTOR_SIZE)
/* Data buffers. */
static char *ext_array, *ext_buffer; /* interspersed buffer */
static char *rb0_array; /* write readback buffer for disk 0 */
static char *rb1_array; /* write readback buffer for disk 1 */
/*===========================================================================*
* sum_init *
*===========================================================================*/
void sum_init(void)
{
/* Initialize buffers. */
ext_array = alloc_contig(SBUF_SIZE, 0, NULL);
rb0_array = alloc_contig(SBUF_SIZE, 0, NULL);
rb1_array = alloc_contig(SBUF_SIZE, 0, NULL);
if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL)
panic(__FILE__, "no memory available", NO_NUM);
}
/*===========================================================================*
* calc_sum *
*===========================================================================*/
static void calc_sum(unsigned sector, char *data, char *sum)
{
/* Compute the checksum for a sector. The sector number must be part
* of the checksum in some way.
*/
unsigned long crc, *p, *q;
int i, j;
struct MD5Context ctx;
switch(SUM_TYPE) {
case ST_XOR:
/* Basic XOR checksum */
p = (unsigned long *) data;
memset(sum, 0, SUM_SIZE);
for(i = 0; i < SECTOR_SIZE / SUM_SIZE; i++) {
q = (unsigned long *) sum;
for(j = 0; j < SUM_SIZE / sizeof(*p); j++) {
*q ^= *p;
q++;
p++;
}
}
q = (unsigned long *) sum;
*q ^= sector;
break;
case ST_CRC:
/* CRC32 checksum */
crc = compute_crc((unsigned char *) data, SECTOR_SIZE);
q = (unsigned long *) sum;
*q = crc ^ sector;
break;
case ST_MD5:
/* MD5 checksum */
MD5Init(&ctx);
MD5Update(&ctx, (unsigned char *) data, SECTOR_SIZE);
MD5Update(&ctx, (unsigned char *) &sector, sizeof(sector));
MD5Final((unsigned char *) sum, &ctx);
break;
default:
panic(__FILE__, "invalid checksum type", SUM_TYPE);
}
}
/*===========================================================================*
* read_sectors *
*===========================================================================*/
static int read_sectors(char *buf, sector_t phys_sector, int count)
{
/* Read 'count' sectors starting at 'phys_sector' into 'buf'. If an
* EOF occurs, zero-fill the remaining part of the buffer.
*/
size_t size, wsize;
int r;
size = wsize = count * SECTOR_SIZE;
r = read_write(SEC2POS(phys_sector), buf, buf, &size, FLT_READ);
if (r != OK)
return r;
if (size != wsize) {
#if DEBUG
printf("Filter: EOF reading sector %lu\n", phys_sector);
#endif
memset(buf + size, 0, wsize - size);
}
return OK;
}
/*===========================================================================*
* make_group_sum *
*===========================================================================*/
static void make_group_sum(char *bufp, char *sump, sector_t sector, int index,
int count)
{
/* Compute checksums for 'count' sectors within a group, starting at
* sector 'index' into the group, which has logical sector number
* 'sector'. The 'bufp' pointer points to the same first sector to
* start checksumming; 'sump' is a pointer to the checksum sector.
*/
sump += index * SUM_SIZE;
while (count--) {
calc_sum(sector, bufp, sump);
bufp += SECTOR_SIZE;
sump += SUM_SIZE;
sector++;
}
}
/*===========================================================================*
* check_group_sum *
*===========================================================================*/
static int check_group_sum(char *bufp, char *sump, sector_t sector, int index,
int count)
{
/* Check checksums in a group. Parameters are the same as in
* make_group_sum(). Return OK if all checksums check out, or RET_REDO
* upon failure.
*/
char sum_buffer[SECTOR_SIZE];
sump += index * SUM_SIZE;
while (count--) {
calc_sum(sector, bufp, sum_buffer);
if (memcmp(sum_buffer, sump, SUM_SIZE)) {
printf("Filter: BAD CHECKSUM at sector %lu\n", sector);
if (BAD_SUM_ERROR)
return bad_driver(DRIVER_MAIN, BD_DATA, EIO);
}
bufp += SECTOR_SIZE;
sump += SUM_SIZE;
sector++;
}
return OK;
}
/*===========================================================================*
* make_sum *
*===========================================================================*/
static int make_sum(sector_t current_sector, sector_t sectors_left)
{
/* Compute checksums over all data in the buffer with expanded data.
* As side effect, possibly read in first and last checksum sectors
* and data to fill the gap between the last data sector and the last
* checksum sector.
*/
sector_t sector_in_group, group_left;
size_t size, gap;
char *extp;
int r;
/* See the description of the extended buffer in transfer(). A number
* of points are relevant for this function in particular:
*
* 1) If the "xx" head of the buffer does not cover an entire group,
* we need to copy in the first checksum sector so that we can
* modify it.
* 2) We can generate checksums for the full "yyyyy" groups without
* copying in the corresponding checksum sectors first, because
* those sectors will be overwritten entirely anyway.
* 3) We copy in not only the checksum sector for the group containing
* the "zzz" tail data, but also all the data between "zzz" and the
* last checksum sector. This allows us to write all the data in
* the buffer in one operation. In theory, we could verify the
* checksum of the data in this gap for extra early failure
* detection, but we currently do not do this.
*
* If points 1 and 3 cover the same group (implying a small, unaligned
* write operation), the read operation is done only once. Whether
* point 1 or 3 is skipped depends on whether there is a gap before
* the checksum sector.
*/
sector_in_group = current_sector % NR_SUM_SEC;
group_left = NR_SUM_SEC - sector_in_group;
extp = ext_buffer;
/* This loop covers points 1 and 2. */
while (sectors_left >= group_left) {
size = group_left * SECTOR_SIZE;
if (sector_in_group > 0) {
if ((r = read_sectors(extp + size,
LOG2PHYS(current_sector) + group_left,
1)) != OK)
return r;
}
else memset(extp + size, 0, SECTOR_SIZE);
make_group_sum(extp, extp + size, current_sector,
sector_in_group, group_left);
extp += size + SECTOR_SIZE;
sectors_left -= group_left;
current_sector += group_left;
sector_in_group = 0;
group_left = NR_SUM_SEC;
}
/* The remaining code covers point 3. */
if (sectors_left > 0) {
size = sectors_left * SECTOR_SIZE;
if (group_left != NR_SUM_SEC - sector_in_group)
panic(__FILE__, "group_left assertion", 0);
gap = group_left - sectors_left;
if (gap <= 0)
panic(__FILE__, "gap assertion", 0);
if ((r = read_sectors(extp + size,
LOG2PHYS(current_sector) + sectors_left,
gap + 1)) != OK)
return r;
make_group_sum(extp, extp + size + gap * SECTOR_SIZE,
current_sector, sector_in_group, sectors_left);
}
return OK;
}
/*===========================================================================*
* check_sum *
*===========================================================================*/
static int check_sum(sector_t current_sector, size_t bytes_left)
{
/* Check checksums of all data in the buffer with expanded data.
* Return OK if all checksums are okay, or RET_REDO upon failure.
*/
sector_t sector_in_group;
size_t size, groupbytes_left;
int count;
char *extp;
extp = ext_buffer;
sector_in_group = current_sector % NR_SUM_SEC;
groupbytes_left = (NR_SUM_SEC - sector_in_group) * SECTOR_SIZE;
while (bytes_left > 0) {
size = MIN(bytes_left, groupbytes_left);
count = size / SECTOR_SIZE;
if (check_group_sum(extp, extp + groupbytes_left,
current_sector, sector_in_group, count))
return RET_REDO;
extp += size + SECTOR_SIZE;
bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
current_sector += count;
sector_in_group = 0;
groupbytes_left = GROUP_SIZE;
}
return OK;
}
/*===========================================================================*
* check_write *
*===========================================================================*/
static int check_write(u64_t pos, size_t size)
{
/* Read back the data just written, from both disks if mirroring is
* enabled, and check the result against the original. Return OK on
* success; report the malfunctioning driver and return RET_REDO
* otherwise.
*/
char *rb0_buffer, *rb1_buffer;
size_t orig_size;
int r;
if (size == 0)
return OK;
rb0_buffer = rb1_buffer =
flt_malloc(size, rb0_array, SBUF_SIZE);
if (USE_MIRROR)
rb1_buffer = flt_malloc(size, rb1_array, SBUF_SIZE);
orig_size = size;
r = read_write(pos, rb0_buffer, rb1_buffer, &size, FLT_READ2);
if (r != OK) {
if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
flt_free(rb0_buffer, orig_size, rb0_array);
return r;
}
/* If we get a size smaller than what we requested, then we somehow
* succeeded in writing past the disk end, and now fail to read it all
* back. This is not an error, and we just compare the part that we
* did manage to read back in.
*/
if (memcmp(ext_buffer, rb0_buffer, size)) {
#if DEBUG
printf("Filter: readback from disk 0 failed (size %d)\n",
size);
#endif
return bad_driver(DRIVER_MAIN, BD_DATA, EFAULT);
}
if (USE_MIRROR && memcmp(ext_buffer, rb1_buffer, size)) {
#if DEBUG
printf("Filter: readback from disk 1 failed (size %d)\n",
size);
#endif
return bad_driver(DRIVER_BACKUP, BD_DATA, EFAULT);
}
if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
flt_free(rb0_buffer, orig_size, rb0_array);
return OK;
}
/*===========================================================================*
* expand *
*===========================================================================*/
static void expand(sector_t first_sector, char *buffer, sector_t sectors_left)
{
/* Expand the contiguous data in 'buffer' to interspersed format in
* 'ext_buffer'. The checksum areas are not touched.
*/
char *srcp, *dstp;
sector_t group_left;
size_t size;
int count;
srcp = buffer;
dstp = ext_buffer;
group_left = NR_SUM_SEC - first_sector % NR_SUM_SEC;
while (sectors_left > 0) {
count = MIN(sectors_left, group_left);
size = count * SECTOR_SIZE;
memcpy(dstp, srcp, size);
srcp += size;
dstp += size + SECTOR_SIZE;
sectors_left -= count;
group_left = NR_SUM_SEC;
}
}
/*===========================================================================*
* collapse *
*===========================================================================*/
static void collapse(sector_t first_sector, char *buffer, size_t *sizep)
{
/* Collapse the interspersed data in 'ext_buffer' to contiguous format
* in 'buffer'. As side effect, adjust the given size to reflect the
* resulting contiguous data size.
*/
char *srcp, *dstp;
size_t size, bytes_left, groupbytes_left;
srcp = ext_buffer;
dstp = buffer;
bytes_left = *sizep;
groupbytes_left =
(NR_SUM_SEC - first_sector % NR_SUM_SEC) * SECTOR_SIZE;
while (bytes_left > 0) {
size = MIN(bytes_left, groupbytes_left);
memcpy(dstp, srcp, size);
srcp += size + SECTOR_SIZE;
dstp += size;
bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
groupbytes_left = GROUP_SIZE;
}
*sizep = dstp - buffer;
}
/*===========================================================================*
* expand_sizes *
*===========================================================================*/
static size_t expand_sizes(sector_t first_sector, sector_t nr_sectors,
size_t *req_size)
{
/* Compute the size of the data area including interspersed checksum
* sectors (req_size) and the size of the data area including
* interspersed and trailing checksum sectors (the return value).
*/
sector_t last_sector, sum_sector, phys_sector;
last_sector = LOG2PHYS(first_sector + nr_sectors - 1);
sum_sector = SEC2SUM_NR(first_sector + nr_sectors - 1);
phys_sector = LOG2PHYS(first_sector);
*req_size = (last_sector - phys_sector + 1) * SECTOR_SIZE;
return (sum_sector - phys_sector + 1) * SECTOR_SIZE;
}
/*===========================================================================*
* collapse_size *
*===========================================================================*/
static void collapse_size(sector_t first_sector, size_t *sizep)
{
/* Compute the size of the contiguous user data written to disk, given
* the result size of the write operation with interspersed checksums.
*/
sector_t sector_in_group;
size_t sectors_from_group_base, nr_sum_secs, nr_data_secs;
sector_in_group = first_sector % NR_SUM_SEC;
sectors_from_group_base = *sizep / SECTOR_SIZE + sector_in_group;
nr_sum_secs = sectors_from_group_base / (NR_SUM_SEC+1);
nr_data_secs = sectors_from_group_base - sector_in_group - nr_sum_secs;
*sizep = nr_data_secs * SECTOR_SIZE;
}
/*===========================================================================*
* transfer *
*===========================================================================*/
int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw)
{
/* Transfer data in interspersed-checksum format. When writing, first
* compute checksums, and read back the written data afterwards. When
* reading, check the stored checksums afterwards.
*/
sector_t first_sector, nr_sectors;
size_t ext_size, req_size, res_size;
u64_t phys_pos;
int r;
/* If we don't use checksums or even checksum layout, simply pass on
* the request to the drivers as is.
*/
if (!USE_SUM_LAYOUT)
return read_write(pos, buffer, buffer, sizep, flag_rw);
/* The extended buffer (for checksumming) essentially looks like this:
*
* ------------------------------
* |xx|C|yyyyy|C|yyyyy|C|zzz |C|
* ------------------------------
*
* In this example, "xxyyyyyyyyyyzzz" is our actual data. The data is
* split up into groups, so that each group is followed by a checksum
* sector C containing the checksums for all data sectors in that
* group. The head and tail of the actual data may cover parts of
* groups; the remaining data (nor their checksums) are not to be
* modified.
*
* The entire buffer is written or read in one operation: the
* read_write() call below. In order to write, we may first have to
* read some data; see the description in make_sum().
*
* Some points of interest here:
* - We need a buffer large enough to hold the all user and non-user
* data, from the first "xx" to the last checksum sector. This size
* is ext_size.
* - For writing, we need to expand the user-provided data from
* contiguous layout to interspersed format. The size of the user
* data after expansion is req_size.
* - For reading, we need to collapse the user-requested data from
* interspersed to contiguous format. For writing, we still need to
* compute the contiguous result size to return to the user.
* - In both cases, the result size may be different from the
* requested write size, because an EOF (as in, disk end) may occur
* and the resulting size is less than the requested size.
* - If we only follow the checksum layout, and do not do any
* checksumming, ext_size is reduced to req_size.
*/
first_sector = POS2SEC(pos);
nr_sectors = *sizep / SECTOR_SIZE;
phys_pos = SEC2POS(LOG2PHYS(first_sector));
#if DEBUG2
printf("Filter: transfer: pos 0x%lx:0x%lx -> phys_pos 0x%lx:0x%lx\n",
ex64hi(pos), ex64lo(pos), ex64hi(phys_pos), ex64lo(phys_pos));
#endif
/* Compute the size for the buffer and for the user data after
* expansion.
*/
ext_size = expand_sizes(first_sector, nr_sectors, &req_size);
if (!USE_CHECKSUM)
ext_size = req_size;
ext_buffer = flt_malloc(ext_size, ext_array, SBUF_SIZE);
if (flag_rw == FLT_WRITE) {
expand(first_sector, buffer, nr_sectors);
if (USE_CHECKSUM && make_sum(first_sector, nr_sectors))
return RET_REDO;
}
/* Perform the actual I/O. */
res_size = ext_size;
r = read_write(phys_pos, ext_buffer, ext_buffer, &res_size, flag_rw);
#if DEBUG2
printf("Filter: transfer: read_write(%x:%x, %u, %d) = %d, %u\n",
ex64hi(phys_pos), ex64lo(phys_pos), ext_size, flag_rw, r,
res_size);
#endif
if (r != OK) {
flt_free(ext_buffer, ext_size, ext_array);
return r;
}
/* Limit the resulting size to the user data part of the buffer.
* The resulting size may already be less, due to an EOF.
*/
*sizep = MIN(req_size, res_size);
if (flag_rw == FLT_WRITE) {
if (USE_CHECKSUM && check_write(phys_pos, res_size))
return RET_REDO;
collapse_size(first_sector, sizep);
}
else { /* FLT_READ */
if (USE_CHECKSUM && check_sum(first_sector, *sizep))
return RET_REDO;
collapse(first_sector, buffer, sizep);
}
flt_free(ext_buffer, ext_size, ext_array);
return OK;
}
/*===========================================================================*
* convert *
*===========================================================================*/
u64_t convert(u64_t size)
{
/* Given a raw disk size, subtract the amount of disk space used for
* checksums, resulting in the user-visible disk size.
*/
sector_t sectors;
if (!USE_SUM_LAYOUT)
return size;
sectors = POS2SEC(size);
return SEC2POS(sectors / (NR_SUM_SEC + 1) * NR_SUM_SEC);
}

109
drivers/filter/util.c Normal file
View file

@ -0,0 +1,109 @@
/* Filter driver - utility functions */
#include "inc.h"
#include <sys/mman.h>
#include <signal.h>
static clock_t next_alarm;
/*===========================================================================*
* flt_malloc *
*===========================================================================*/
char *flt_malloc(size_t size, char *sbuf, size_t ssize)
{
/* Allocate a buffer for 'size' bytes. If 'size' is equal to or less
* than 'ssize', return the static buffer 'sbuf', otherwise, use
* malloc() to allocate memory dynamically.
*/
char *p;
if (size <= ssize)
return sbuf;
p = alloc_contig(size, 0, NULL);
if (p == NULL)
panic(__FILE__, "out of memory", size);
return p;
}
/*===========================================================================*
* flt_free *
*===========================================================================*/
void flt_free(char *buf, size_t size, char *sbuf)
{
/* Free a buffer previously allocated with flt_malloc().
*/
if(buf != sbuf)
munmap(buf, size);
}
/*===========================================================================*
* print64 *
*===========================================================================*/
char *print64(u64_t p)
{
#define NB 10
static int n = 0;
static char buf[NB][100];
u32_t lo = ex64lo(p), hi = ex64hi(p);
n = (n+1) % NB;
if(!hi) sprintf(buf[n], "%lx", lo);
else sprintf(buf[n], "%lx%08lx", hi, lo);
return buf[n];
}
/*===========================================================================*
* flt_alarm *
*===========================================================================*/
clock_t flt_alarm(clock_t dt)
{
int r;
if(dt < 0)
return next_alarm;
r = sys_setalarm(dt, 0);
if(r != OK)
panic(__FILE__, "sys_setalarm failed", r);
if(dt == 0) {
if(!next_alarm)
panic(__FILE__, "clearing unset alarm", r);
next_alarm = 0;
} else {
if(next_alarm)
panic(__FILE__, "overwriting alarm", r);
if ((r = getuptime(&next_alarm)) != OK)
panic(__FILE__, "getuptime failed", r);
next_alarm += dt;
}
return next_alarm;
}
/*===========================================================================*
* got_alarm *
*===========================================================================*/
static void got_alarm(int sig)
{
/* Do nothing. */
}
/*===========================================================================*
* flt_sleep *
*===========================================================================*/
void flt_sleep(int secs)
{
/* Sleep for the given number of seconds. Don't use sleep(), as that
* will end up calling select() to VFS. This implementation could be
* improved.
*/
signal(SIGALRM, got_alarm);
alarm(secs);
pause();
}

View file

@ -464,3 +464,25 @@ driver osscore
;
uid 0;
};
driver filter
{
system
SETALARM # 24
TIMES # 25
GETINFO # 26
SAFECOPYFROM # 31
SAFECOPYTO # 32
SETGRANT # 34
SYSCTL # 44
;
ipc
SYSTEM PM VFS RS DS VM
at_wini
bios_wini
;
control
at_wini
bios_wini
;
};

View file

@ -38,6 +38,8 @@ enum dev_style { STYLE_DEV, STYLE_NDEV, STYLE_TTY, STYLE_CLONE };
#define RESCUE_MAJOR 9 /* major device for rescue */
#define FILTER_MAJOR 11 /* major device for filter driver */
#define LOG_MAJOR 15 /* major device for log driver */
# define IS_KLOG_DEV 0 /* minor device for /dev/klog */

View file

@ -46,7 +46,7 @@ PRIVATE struct dmap init_dmap[] = {
DT(0, no_dev, 0, NONE, DMAP_MUTABLE, "") /* 8 = /dev/c1 */
DT(0, 0, 0, 0, DMAP_MUTABLE, "") /* 9 = not used */
DT(0, no_dev, 0, 0, DMAP_MUTABLE, "") /*10 = /dev/c2 */
DT(0, 0, 0, 0, DMAP_MUTABLE, "") /*11 = not used */
DT(0, no_dev, 0, 0, DMAP_MUTABLE, "") /*11 = /dev/filter*/
DT(0, no_dev, 0, NONE, DMAP_MUTABLE, "") /*12 = /dev/c3 */
DT(0, no_dev, 0, NONE, DMAP_MUTABLE, "") /*13 = /dev/audio */
DT(0, 0, 0, 0, DMAP_MUTABLE, "") /*14 = not used */