minix/drivers/filter/sum.c
David van Moolenbroek b4d909d415 Split block/character protocols and libdriver
This patch separates the character and block driver communication
protocols. The old character protocol remains the same, but a new
block protocol is introduced. The libdriver library is replaced by
two new libraries: libchardriver and libblockdriver. Their exposed
API, and drivers that use them, have been updated accordingly.
Together, libbdev and libblockdriver now completely abstract away
the message format used by the block protocol. As the memory driver
is both a character and a block device driver, it now implements its
own message loop.

The most important semantic change made to the block protocol is that
it is no longer possible to return both partial results and an error
for a single transfer. This simplifies the interaction between the
caller and the driver, as the I/O vector no longer needs to be copied
back. Also, drivers are now no longer supposed to decide based on the
layout of the I/O vector when a transfer should be cut short. Put
simply, transfers are now supposed to either succeed completely, or
result in an error.

After this patch, the state of the various pieces is as follows:
- block protocol: stable
- libbdev API: stable for synchronous communication
- libblockdriver API: needs slight revision (the drvlib/partition API
  in particular; the threading API will also change shortly)
- character protocol: needs cleanup
- libchardriver API: needs cleanup accordingly
- driver restarts: largely unsupported until endpoint changes are
  reintroduced

As a side effect, this patch eliminates several bugs, hacks, and gcc
-Wall and -W warnings all over the place. It probably introduces a
few new ones, too.

Update warning: this patch changes the protocol between MFS and disk
drivers, so in order to use old/new images, the MFS from the ramdisk
must be used to mount all file systems.
2011-11-23 14:06:37 +01:00

622 lines
18 KiB
C

/* Filter driver - middle layer - checksumming */
#include "inc.h"
#include "crc.h"
#include "md5.h"
#define GROUP_SIZE (SECTOR_SIZE * NR_SUM_SEC)
#define SEC2SUM_NR(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + NR_SUM_SEC)
#define LOG2PHYS(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + (nr)%NR_SUM_SEC)
#define POS2SEC(nr) div64u((nr), SECTOR_SIZE)
#define SEC2POS(nr) mul64u((nr), SECTOR_SIZE)
/* Data buffers. */
static char *ext_array, *ext_buffer; /* interspersed buffer */
static char *rb0_array; /* write readback buffer for disk 0 */
static char *rb1_array; /* write readback buffer for disk 1 */
/*===========================================================================*
* sum_init *
*===========================================================================*/
void sum_init(void)
{
/* Initialize buffers. */
ext_array = flt_malloc(SBUF_SIZE, NULL, 0);
rb0_array = flt_malloc(SBUF_SIZE, NULL, 0);
rb1_array = flt_malloc(SBUF_SIZE, NULL, 0);
if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL)
panic("no memory available");
}
/*===========================================================================*
* calc_sum *
*===========================================================================*/
static void calc_sum(unsigned sector, char *data, char *sum)
{
/* Compute the checksum for a sector. The sector number must be part
* of the checksum in some way.
*/
unsigned long crc, *p, *q;
int i, j;
struct MD5Context ctx;
switch(SUM_TYPE) {
case ST_NIL:
/* No checksum at all */
q = (unsigned long *) sum;
*q = sector;
break;
case ST_XOR:
/* Basic XOR checksum */
p = (unsigned long *) data;
memset(sum, 0, SUM_SIZE);
for(i = 0; i < SECTOR_SIZE / SUM_SIZE; i++) {
q = (unsigned long *) sum;
for(j = 0; (size_t) j < SUM_SIZE / sizeof(*p); j++) {
*q ^= *p;
q++;
p++;
}
}
q = (unsigned long *) sum;
*q ^= sector;
break;
case ST_CRC:
/* CRC32 checksum */
crc = compute_crc((unsigned char *) data, SECTOR_SIZE);
q = (unsigned long *) sum;
*q = crc ^ sector;
break;
case ST_MD5:
/* MD5 checksum */
MD5Init(&ctx);
MD5Update(&ctx, (unsigned char *) data, SECTOR_SIZE);
MD5Update(&ctx, (unsigned char *) &sector, sizeof(sector));
MD5Final((unsigned char *) sum, &ctx);
break;
default:
panic("invalid checksum type: %d", SUM_TYPE);
}
}
/*===========================================================================*
* read_sectors *
*===========================================================================*/
static int read_sectors(char *buf, sector_t phys_sector, int count)
{
/* Read 'count' sectors starting at 'phys_sector' into 'buf'. If an
* EOF occurs, zero-fill the remaining part of the buffer.
*/
size_t size, wsize;
int r;
size = wsize = count * SECTOR_SIZE;
r = read_write(SEC2POS(phys_sector), buf, buf, &size, FLT_READ);
if (r != OK)
return r;
if (size != wsize) {
#if DEBUG
printf("Filter: EOF reading sector %lu\n", phys_sector);
#endif
memset(buf + size, 0, wsize - size);
}
return OK;
}
/*===========================================================================*
* make_group_sum *
*===========================================================================*/
static void make_group_sum(char *bufp, char *sump, sector_t sector, int index,
int count)
{
/* Compute checksums for 'count' sectors within a group, starting at
* sector 'index' into the group, which has logical sector number
* 'sector'. The 'bufp' pointer points to the same first sector to
* start checksumming; 'sump' is a pointer to the checksum sector.
*/
sump += index * SUM_SIZE;
while (count--) {
calc_sum(sector, bufp, sump);
bufp += SECTOR_SIZE;
sump += SUM_SIZE;
sector++;
}
}
/*===========================================================================*
* check_group_sum *
*===========================================================================*/
static int check_group_sum(char *bufp, const char *sump, sector_t sector,
int index, int count)
{
/* Check checksums in a group. Parameters are the same as in
* make_group_sum(). Return OK if all checksums check out, or RET_REDO
* upon failure.
*/
char sum_buffer[SECTOR_SIZE];
sump += index * SUM_SIZE;
while (count--) {
calc_sum(sector, bufp, sum_buffer);
if (memcmp(sum_buffer, sump, SUM_SIZE)) {
printf("Filter: BAD CHECKSUM at sector %lu\n", sector);
if (BAD_SUM_ERROR)
return bad_driver(DRIVER_MAIN, BD_DATA, EIO);
}
bufp += SECTOR_SIZE;
sump += SUM_SIZE;
sector++;
}
return OK;
}
/*===========================================================================*
* make_sum *
*===========================================================================*/
static int make_sum(sector_t current_sector, sector_t sectors_left)
{
/* Compute checksums over all data in the buffer with expanded data.
* As side effect, possibly read in first and last checksum sectors
* and data to fill the gap between the last data sector and the last
* checksum sector.
*/
sector_t sector_in_group, group_left;
size_t size, gap;
char *extp;
int r;
/* See the description of the extended buffer in transfer(). A number
* of points are relevant for this function in particular:
*
* 1) If the "xx" head of the buffer does not cover an entire group,
* we need to copy in the first checksum sector so that we can
* modify it.
* 2) We can generate checksums for the full "yyyyy" groups without
* copying in the corresponding checksum sectors first, because
* those sectors will be overwritten entirely anyway.
* 3) We copy in not only the checksum sector for the group containing
* the "zzz" tail data, but also all the data between "zzz" and the
* last checksum sector. This allows us to write all the data in
* the buffer in one operation. In theory, we could verify the
* checksum of the data in this gap for extra early failure
* detection, but we currently do not do this.
*
* If points 1 and 3 cover the same group (implying a small, unaligned
* write operation), the read operation is done only once. Whether
* point 1 or 3 is skipped depends on whether there is a gap before
* the checksum sector.
*/
sector_in_group = current_sector % NR_SUM_SEC;
group_left = NR_SUM_SEC - sector_in_group;
extp = ext_buffer;
/* This loop covers points 1 and 2. */
while (sectors_left >= group_left) {
size = group_left * SECTOR_SIZE;
if (sector_in_group > 0) {
if ((r = read_sectors(extp + size,
LOG2PHYS(current_sector) + group_left,
1)) != OK)
return r;
}
else memset(extp + size, 0, SECTOR_SIZE);
make_group_sum(extp, extp + size, current_sector,
sector_in_group, group_left);
extp += size + SECTOR_SIZE;
sectors_left -= group_left;
current_sector += group_left;
sector_in_group = 0;
group_left = NR_SUM_SEC;
}
/* The remaining code covers point 3. */
if (sectors_left > 0) {
size = sectors_left * SECTOR_SIZE;
if (group_left != NR_SUM_SEC - sector_in_group)
panic("group_left assertion: %d", 0);
gap = group_left - sectors_left;
if (gap <= 0)
panic("gap assertion: %d", 0);
if ((r = read_sectors(extp + size,
LOG2PHYS(current_sector) + sectors_left,
gap + 1)) != OK)
return r;
make_group_sum(extp, extp + size + gap * SECTOR_SIZE,
current_sector, sector_in_group, sectors_left);
}
return OK;
}
/*===========================================================================*
* check_sum *
*===========================================================================*/
static int check_sum(sector_t current_sector, size_t bytes_left)
{
/* Check checksums of all data in the buffer with expanded data.
* Return OK if all checksums are okay, or RET_REDO upon failure.
*/
sector_t sector_in_group;
size_t size, groupbytes_left;
int count;
char *extp;
extp = ext_buffer;
sector_in_group = current_sector % NR_SUM_SEC;
groupbytes_left = (NR_SUM_SEC - sector_in_group) * SECTOR_SIZE;
while (bytes_left > 0) {
size = MIN(bytes_left, groupbytes_left);
count = size / SECTOR_SIZE;
if (check_group_sum(extp, extp + groupbytes_left,
current_sector, sector_in_group, count))
return RET_REDO;
extp += size + SECTOR_SIZE;
bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
current_sector += count;
sector_in_group = 0;
groupbytes_left = GROUP_SIZE;
}
return OK;
}
/*===========================================================================*
* check_write *
*===========================================================================*/
static int check_write(u64_t pos, size_t size)
{
/* Read back the data just written, from both disks if mirroring is
* enabled, and check the result against the original. Return OK on
* success; report the malfunctioning driver and return RET_REDO
* otherwise.
*/
char *rb0_buffer, *rb1_buffer;
size_t orig_size;
int r;
if (size == 0)
return OK;
rb0_buffer = rb1_buffer =
flt_malloc(size, rb0_array, SBUF_SIZE);
if (USE_MIRROR)
rb1_buffer = flt_malloc(size, rb1_array, SBUF_SIZE);
orig_size = size;
r = read_write(pos, rb0_buffer, rb1_buffer, &size, FLT_READ2);
if (r != OK) {
if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
flt_free(rb0_buffer, orig_size, rb0_array);
return r;
}
/* If we get a size smaller than what we requested, then we somehow
* succeeded in writing past the disk end, and now fail to read it all
* back. This is not an error, and we just compare the part that we
* did manage to read back in.
*/
if (memcmp(ext_buffer, rb0_buffer, size)) {
#if DEBUG
printf("Filter: readback from disk 0 failed (size %d)\n",
size);
#endif
return bad_driver(DRIVER_MAIN, BD_DATA, EFAULT);
}
if (USE_MIRROR && memcmp(ext_buffer, rb1_buffer, size)) {
#if DEBUG
printf("Filter: readback from disk 1 failed (size %d)\n",
size);
#endif
return bad_driver(DRIVER_BACKUP, BD_DATA, EFAULT);
}
if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
flt_free(rb0_buffer, orig_size, rb0_array);
return OK;
}
/*===========================================================================*
* expand *
*===========================================================================*/
static void expand(sector_t first_sector, char *buffer, sector_t sectors_left)
{
/* Expand the contiguous data in 'buffer' to interspersed format in
* 'ext_buffer'. The checksum areas are not touched.
*/
char *srcp, *dstp;
sector_t group_left;
size_t size;
int count;
srcp = buffer;
dstp = ext_buffer;
group_left = NR_SUM_SEC - first_sector % NR_SUM_SEC;
while (sectors_left > 0) {
count = MIN(sectors_left, group_left);
size = count * SECTOR_SIZE;
memcpy(dstp, srcp, size);
srcp += size;
dstp += size + SECTOR_SIZE;
sectors_left -= count;
group_left = NR_SUM_SEC;
}
}
/*===========================================================================*
* collapse *
*===========================================================================*/
static void collapse(sector_t first_sector, char *buffer, size_t *sizep)
{
/* Collapse the interspersed data in 'ext_buffer' to contiguous format
* in 'buffer'. As side effect, adjust the given size to reflect the
* resulting contiguous data size.
*/
char *srcp, *dstp;
size_t size, bytes_left, groupbytes_left;
srcp = ext_buffer;
dstp = buffer;
bytes_left = *sizep;
groupbytes_left =
(NR_SUM_SEC - first_sector % NR_SUM_SEC) * SECTOR_SIZE;
while (bytes_left > 0) {
size = MIN(bytes_left, groupbytes_left);
memcpy(dstp, srcp, size);
srcp += size + SECTOR_SIZE;
dstp += size;
bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
groupbytes_left = GROUP_SIZE;
}
*sizep = dstp - buffer;
}
/*===========================================================================*
* expand_sizes *
*===========================================================================*/
static size_t expand_sizes(sector_t first_sector, sector_t nr_sectors,
size_t *req_size)
{
/* Compute the size of the data area including interspersed checksum
* sectors (req_size) and the size of the data area including
* interspersed and trailing checksum sectors (the return value).
*/
sector_t last_sector, sum_sector, phys_sector;
last_sector = LOG2PHYS(first_sector + nr_sectors - 1);
sum_sector = SEC2SUM_NR(first_sector + nr_sectors - 1);
phys_sector = LOG2PHYS(first_sector);
*req_size = (last_sector - phys_sector + 1) * SECTOR_SIZE;
return (sum_sector - phys_sector + 1) * SECTOR_SIZE;
}
/*===========================================================================*
* collapse_size *
*===========================================================================*/
static void collapse_size(sector_t first_sector, size_t *sizep)
{
/* Compute the size of the contiguous user data written to disk, given
* the result size of the write operation with interspersed checksums.
*/
sector_t sector_in_group;
size_t sectors_from_group_base, nr_sum_secs, nr_data_secs;
sector_in_group = first_sector % NR_SUM_SEC;
sectors_from_group_base = *sizep / SECTOR_SIZE + sector_in_group;
nr_sum_secs = sectors_from_group_base / (NR_SUM_SEC+1);
nr_data_secs = sectors_from_group_base - sector_in_group - nr_sum_secs;
*sizep = nr_data_secs * SECTOR_SIZE;
}
/*===========================================================================*
* transfer *
*===========================================================================*/
int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw)
{
/* Transfer data in interspersed-checksum format. When writing, first
* compute checksums, and read back the written data afterwards. When
* reading, check the stored checksums afterwards.
*/
sector_t first_sector, nr_sectors;
size_t ext_size, req_size, res_size;
u64_t phys_pos;
int r;
/* If we don't use checksums or even checksum layout, simply pass on
* the request to the drivers as is.
*/
if (!USE_SUM_LAYOUT)
return read_write(pos, buffer, buffer, sizep, flag_rw);
/* The extended buffer (for checksumming) essentially looks like this:
*
* ------------------------------
* |xx|C|yyyyy|C|yyyyy|C|zzz |C|
* ------------------------------
*
* In this example, "xxyyyyyyyyyyzzz" is our actual data. The data is
* split up into groups, so that each group is followed by a checksum
* sector C containing the checksums for all data sectors in that
* group. The head and tail of the actual data may cover parts of
* groups; the remaining data (nor their checksums) are not to be
* modified.
*
* The entire buffer is written or read in one operation: the
* read_write() call below. In order to write, we may first have to
* read some data; see the description in make_sum().
*
* Some points of interest here:
* - We need a buffer large enough to hold the all user and non-user
* data, from the first "xx" to the last checksum sector. This size
* is ext_size.
* - For writing, we need to expand the user-provided data from
* contiguous layout to interspersed format. The size of the user
* data after expansion is req_size.
* - For reading, we need to collapse the user-requested data from
* interspersed to contiguous format. For writing, we still need to
* compute the contiguous result size to return to the user.
* - In both cases, the result size may be different from the
* requested write size, because an EOF (as in, disk end) may occur
* and the resulting size is less than the requested size.
* - If we only follow the checksum layout, and do not do any
* checksumming, ext_size is reduced to req_size.
*/
first_sector = POS2SEC(pos);
nr_sectors = *sizep / SECTOR_SIZE;
phys_pos = SEC2POS(LOG2PHYS(first_sector));
#if DEBUG2
printf("Filter: transfer: pos 0x%lx:0x%lx -> phys_pos 0x%lx:0x%lx\n",
ex64hi(pos), ex64lo(pos), ex64hi(phys_pos), ex64lo(phys_pos));
#endif
/* Compute the size for the buffer and for the user data after
* expansion.
*/
ext_size = expand_sizes(first_sector, nr_sectors, &req_size);
if (!USE_CHECKSUM)
ext_size = req_size;
ext_buffer = flt_malloc(ext_size, ext_array, SBUF_SIZE);
if (flag_rw == FLT_WRITE) {
expand(first_sector, buffer, nr_sectors);
if (USE_CHECKSUM && make_sum(first_sector, nr_sectors))
return RET_REDO;
}
/* Perform the actual I/O. */
res_size = ext_size;
r = read_write(phys_pos, ext_buffer, ext_buffer, &res_size, flag_rw);
#if DEBUG2
printf("Filter: transfer: read_write(%x:%x, %u, %d) = %d, %u\n",
ex64hi(phys_pos), ex64lo(phys_pos), ext_size, flag_rw, r,
res_size);
#endif
if (r != OK) {
flt_free(ext_buffer, ext_size, ext_array);
return r;
}
/* Limit the resulting size to the user data part of the buffer.
* The resulting size may already be less, due to an EOF.
*/
*sizep = MIN(req_size, res_size);
if (flag_rw == FLT_WRITE) {
if (USE_CHECKSUM && check_write(phys_pos, res_size))
return RET_REDO;
collapse_size(first_sector, sizep);
}
else { /* FLT_READ */
if (USE_CHECKSUM && check_sum(first_sector, *sizep))
return RET_REDO;
collapse(first_sector, buffer, sizep);
}
flt_free(ext_buffer, ext_size, ext_array);
return OK;
}
/*===========================================================================*
* convert *
*===========================================================================*/
u64_t convert(u64_t size)
{
/* Given a raw disk size, subtract the amount of disk space used for
* checksums, resulting in the user-visible disk size.
*/
sector_t sectors;
if (!USE_SUM_LAYOUT)
return size;
sectors = POS2SEC(size);
return SEC2POS(sectors / (NR_SUM_SEC + 1) * NR_SUM_SEC);
}