2005-04-21 16:53:53 +02:00
|
|
|
/* The file system maintains a buffer cache to reduce the number of disk
|
|
|
|
* accesses needed. Whenever a read or write to the disk is done, a check is
|
|
|
|
* first made to see if the block is in the cache. This file manages the
|
|
|
|
* cache.
|
|
|
|
*
|
|
|
|
* The entry points into this file are:
|
|
|
|
* get_block: request to fetch a block for reading or writing from cache
|
|
|
|
* put_block: return a block previously requested with get_block
|
|
|
|
* alloc_zone: allocate a new zone (to increase the length of a file)
|
|
|
|
* free_zone: release a zone (when a file is removed)
|
|
|
|
* invalidate: remove all the cache blocks on some device
|
2005-10-12 17:06:47 +02:00
|
|
|
*
|
|
|
|
* Private functions:
|
|
|
|
* rw_block: read or write a block from the disk itself
|
2005-04-21 16:53:53 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "fs.h"
|
2006-11-27 15:21:43 +01:00
|
|
|
#include <minix/u64.h>
|
2011-02-28 15:19:19 +01:00
|
|
|
#include <sys/param.h>
|
2010-05-05 13:35:04 +02:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <assert.h>
|
2011-02-28 15:19:19 +01:00
|
|
|
#include <math.h>
|
2005-04-21 16:53:53 +02:00
|
|
|
#include "buf.h"
|
|
|
|
#include "super.h"
|
2008-11-19 13:26:10 +01:00
|
|
|
#include "inode.h"
|
2005-04-21 16:53:53 +02:00
|
|
|
|
|
|
|
FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) );
|
2010-06-01 14:35:33 +02:00
|
|
|
FORWARD _PROTOTYPE( void rw_block, (struct buf *, int) );
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
PRIVATE int vmcache_avail = -1; /* 0 if not available, >0 if available. */
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* get_block *
|
|
|
|
*===========================================================================*/
|
2010-04-13 12:58:41 +02:00
|
|
|
PUBLIC struct buf *get_block(
|
|
|
|
register dev_t dev, /* on which device is the block? */
|
|
|
|
register block_t block, /* which block is wanted? */
|
|
|
|
int only_search /* if NO_READ, don't read, else act normal */
|
|
|
|
)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Check to see if the requested block is in the block cache. If so, return
|
|
|
|
* a pointer to it. If not, evict some other block and fetch it (unless
|
|
|
|
* 'only_search' is 1). All the blocks in the cache that are not in use
|
|
|
|
* are linked together in a chain, with 'front' pointing to the least recently
|
|
|
|
* used block and 'rear' to the most recently used block. If 'only_search' is
|
|
|
|
* 1, the block being requested will be overwritten in its entirety, so it is
|
|
|
|
* only necessary to see if it is in the cache; if it is not, any free buffer
|
|
|
|
* will do. It is not necessary to actually read the block in from disk.
|
|
|
|
* If 'only_search' is PREFETCH, the block need not be read from the disk,
|
|
|
|
* and the device is not to be marked on the block, so callers can tell if
|
|
|
|
* the block returned is valid.
|
|
|
|
* In addition to the LRU chain, there is also a hash chain to link together
|
|
|
|
* blocks whose block numbers end with the same bit strings, for fast lookup.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int b;
|
2009-09-21 16:47:51 +02:00
|
|
|
static struct buf *bp, *prev_ptr;
|
2010-05-05 13:35:04 +02:00
|
|
|
u64_t yieldid = VM_BLOCKID_NONE, getid = make64(dev, block);
|
|
|
|
int vmcache = 0;
|
|
|
|
|
|
|
|
assert(buf_hash);
|
|
|
|
assert(buf);
|
|
|
|
assert(nr_bufs > 0);
|
|
|
|
|
|
|
|
if(vmcache_avail < 0) {
|
|
|
|
/* Test once for the availability of the vm yield block feature. */
|
|
|
|
if(vm_forgetblock(VM_BLOCKID_NONE) == ENOSYS) {
|
|
|
|
vmcache_avail = 0;
|
|
|
|
} else {
|
|
|
|
vmcache_avail = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* use vmcache if it's available, and allowed, and we're not doing
|
|
|
|
* i/o on a ram disk device.
|
|
|
|
*/
|
|
|
|
if(vmcache_avail && may_use_vmcache && major(dev) != MEMORY_MAJOR)
|
|
|
|
vmcache = 1;
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
ASSERT(fs_block_size > 0);
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/* Search the hash chain for (dev, block). Do_read() can use
|
|
|
|
* get_block(NO_DEV ...) to get an unnamed block to fill with zeros when
|
|
|
|
* someone wants to read from a hole in a file, in which case this search
|
|
|
|
* is skipped
|
|
|
|
*/
|
|
|
|
if (dev != NO_DEV) {
|
2008-11-19 13:26:10 +01:00
|
|
|
b = BUFHASH(block);
|
2005-04-21 16:53:53 +02:00
|
|
|
bp = buf_hash[b];
|
2010-05-10 15:26:00 +02:00
|
|
|
while (bp != NULL) {
|
2005-04-21 16:53:53 +02:00
|
|
|
if (bp->b_blocknr == block && bp->b_dev == dev) {
|
|
|
|
/* Block needed has been found. */
|
|
|
|
if (bp->b_count == 0) rm_lru(bp);
|
|
|
|
bp->b_count++; /* record that block is in use */
|
2008-11-19 15:10:33 +01:00
|
|
|
ASSERT(bp->b_bytes == fs_block_size);
|
2008-11-19 13:26:10 +01:00
|
|
|
ASSERT(bp->b_dev == dev);
|
|
|
|
ASSERT(bp->b_dev != NO_DEV);
|
|
|
|
ASSERT(bp->bp);
|
2005-04-21 16:53:53 +02:00
|
|
|
return(bp);
|
|
|
|
} else {
|
|
|
|
/* This block is not the one sought. */
|
|
|
|
bp = bp->b_hash; /* move to next block on hash chain */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Desired block is not on available chain. Take oldest block ('front'). */
|
2010-05-10 15:26:00 +02:00
|
|
|
if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs);
|
2009-09-21 16:47:51 +02:00
|
|
|
|
|
|
|
if(bp->b_bytes < fs_block_size) {
|
|
|
|
ASSERT(!bp->bp);
|
|
|
|
ASSERT(bp->b_bytes == 0);
|
2010-06-01 14:35:33 +02:00
|
|
|
if(!(bp->bp = alloc_contig( (size_t) fs_block_size, 0, NULL))) {
|
2009-09-21 16:47:51 +02:00
|
|
|
printf("MFS: couldn't allocate a new block.\n");
|
|
|
|
for(bp = front;
|
|
|
|
bp && bp->b_bytes < fs_block_size; bp = bp->b_next)
|
|
|
|
;
|
|
|
|
if(!bp) {
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("no buffer available");
|
2009-09-21 16:47:51 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
bp->b_bytes = fs_block_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT(bp);
|
|
|
|
ASSERT(bp->bp);
|
|
|
|
ASSERT(bp->b_bytes == fs_block_size);
|
|
|
|
ASSERT(bp->b_count == 0);
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
rm_lru(bp);
|
|
|
|
|
|
|
|
/* Remove the block that was just taken from its hash chain. */
|
2008-11-19 13:26:10 +01:00
|
|
|
b = BUFHASH(bp->b_blocknr);
|
2005-04-21 16:53:53 +02:00
|
|
|
prev_ptr = buf_hash[b];
|
|
|
|
if (prev_ptr == bp) {
|
|
|
|
buf_hash[b] = bp->b_hash;
|
|
|
|
} else {
|
|
|
|
/* The block just taken is not on the front of its hash chain. */
|
2010-05-10 15:26:00 +02:00
|
|
|
while (prev_ptr->b_hash != NULL)
|
2005-04-21 16:53:53 +02:00
|
|
|
if (prev_ptr->b_hash == bp) {
|
|
|
|
prev_ptr->b_hash = bp->b_hash; /* found it */
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
prev_ptr = prev_ptr->b_hash; /* keep looking */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the block taken is dirty, make it clean by writing it to the disk.
|
|
|
|
* Avoid hysteresis by flushing all other dirty blocks for the same device.
|
|
|
|
*/
|
|
|
|
if (bp->b_dev != NO_DEV) {
|
|
|
|
if (bp->b_dirt == DIRTY) flushall(bp->b_dev);
|
2010-05-05 13:35:04 +02:00
|
|
|
|
|
|
|
/* Are we throwing out a block that contained something?
|
|
|
|
* Give it to VM for the second-layer cache.
|
|
|
|
*/
|
|
|
|
yieldid = make64(bp->b_dev, bp->b_blocknr);
|
|
|
|
assert(bp->b_bytes == fs_block_size);
|
|
|
|
bp->b_dev = NO_DEV;
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Fill in block's parameters and add it to the hash chain where it goes. */
|
|
|
|
bp->b_dev = dev; /* fill in device number */
|
|
|
|
bp->b_blocknr = block; /* fill in block number */
|
|
|
|
bp->b_count++; /* record that block is being used */
|
2008-11-19 13:26:10 +01:00
|
|
|
b = BUFHASH(bp->b_blocknr);
|
2005-04-21 16:53:53 +02:00
|
|
|
bp->b_hash = buf_hash[b];
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
buf_hash[b] = bp; /* add to hash list */
|
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
if(dev == NO_DEV) {
|
|
|
|
if(vmcache && cmp64(yieldid, VM_BLOCKID_NONE) != 0) {
|
|
|
|
vm_yield_block_get_block(yieldid, VM_BLOCKID_NONE,
|
|
|
|
bp->bp, fs_block_size);
|
|
|
|
}
|
|
|
|
return(bp); /* If the caller wanted a NO_DEV block, work is done. */
|
|
|
|
}
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/* Go get the requested block unless searching or prefetching. */
|
2010-05-05 13:35:04 +02:00
|
|
|
if(only_search == PREFETCH || only_search == NORMAL) {
|
|
|
|
/* Block is not found in our cache, but we do want it
|
|
|
|
* if it's in the vm cache.
|
|
|
|
*/
|
|
|
|
if(vmcache) {
|
|
|
|
/* If we can satisfy the PREFETCH or NORMAL request
|
|
|
|
* from the vm cache, work is done.
|
|
|
|
*/
|
|
|
|
if(vm_yield_block_get_block(yieldid, getid,
|
|
|
|
bp->bp, fs_block_size) == OK) {
|
|
|
|
return bp;
|
|
|
|
}
|
2005-06-17 15:41:12 +02:00
|
|
|
}
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
if(only_search == PREFETCH) {
|
|
|
|
/* PREFETCH: don't do i/o. */
|
|
|
|
bp->b_dev = NO_DEV;
|
|
|
|
} else if (only_search == NORMAL) {
|
|
|
|
rw_block(bp, READING);
|
|
|
|
} else if(only_search == NO_READ) {
|
|
|
|
/* we want this block, but its contents
|
|
|
|
* will be overwritten. VM has to forget
|
|
|
|
* about it.
|
|
|
|
*/
|
|
|
|
if(vmcache) {
|
|
|
|
vm_forgetblock(getid);
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
panic("unexpected only_search value: %d", only_search);
|
|
|
|
|
|
|
|
assert(bp->bp);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
return(bp); /* return the newly acquired block */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* put_block *
|
|
|
|
*===========================================================================*/
|
|
|
|
PUBLIC void put_block(bp, block_type)
|
|
|
|
register struct buf *bp; /* pointer to the buffer to be released */
|
|
|
|
int block_type; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */
|
|
|
|
{
|
|
|
|
/* Return a block to the list of available blocks. Depending on 'block_type'
|
|
|
|
* it may be put on the front or rear of the LRU chain. Blocks that are
|
|
|
|
* expected to be needed again shortly (e.g., partially full data blocks)
|
|
|
|
* go on the rear; blocks that are unlikely to be needed again shortly
|
|
|
|
* (e.g., full data blocks) go on the front. Blocks whose loss can hurt
|
|
|
|
* the integrity of the file system (e.g., inode blocks) are written to
|
|
|
|
* disk immediately if they are dirty.
|
|
|
|
*/
|
2010-05-10 15:26:00 +02:00
|
|
|
if (bp == NULL) return; /* it is easier to check here than in caller */
|
2005-04-21 16:53:53 +02:00
|
|
|
|
|
|
|
bp->b_count--; /* there is one use fewer now */
|
|
|
|
if (bp->b_count != 0) return; /* block is still in use */
|
|
|
|
|
|
|
|
bufs_in_use--; /* one fewer block buffers in use */
|
|
|
|
|
|
|
|
/* Put this block back on the LRU chain. If the ONE_SHOT bit is set in
|
|
|
|
* 'block_type', the block is not likely to be needed again shortly, so put
|
|
|
|
* it on the front of the LRU chain where it will be the first one to be
|
|
|
|
* taken when a free buffer is needed later.
|
|
|
|
*/
|
2005-12-14 13:08:49 +01:00
|
|
|
if (bp->b_dev == DEV_RAM || (block_type & ONE_SHOT)) {
|
2005-04-21 16:53:53 +02:00
|
|
|
/* Block probably won't be needed quickly. Put it on front of chain.
|
|
|
|
* It will be the next block to be evicted from the cache.
|
|
|
|
*/
|
2010-05-10 15:26:00 +02:00
|
|
|
bp->b_prev = NULL;
|
2005-04-21 16:53:53 +02:00
|
|
|
bp->b_next = front;
|
2010-05-10 15:26:00 +02:00
|
|
|
if (front == NULL)
|
2005-04-21 16:53:53 +02:00
|
|
|
rear = bp; /* LRU chain was empty */
|
|
|
|
else
|
|
|
|
front->b_prev = bp;
|
|
|
|
front = bp;
|
2006-10-25 15:40:36 +02:00
|
|
|
}
|
|
|
|
else {
|
2005-04-21 16:53:53 +02:00
|
|
|
/* Block probably will be needed quickly. Put it on rear of chain.
|
|
|
|
* It will not be evicted from the cache for a long time.
|
|
|
|
*/
|
|
|
|
bp->b_prev = rear;
|
2010-05-10 15:26:00 +02:00
|
|
|
bp->b_next = NULL;
|
|
|
|
if (rear == NULL)
|
2005-04-21 16:53:53 +02:00
|
|
|
front = bp;
|
|
|
|
else
|
|
|
|
rear->b_next = bp;
|
|
|
|
rear = bp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* alloc_zone *
|
|
|
|
*===========================================================================*/
|
2010-04-13 12:58:41 +02:00
|
|
|
PUBLIC zone_t alloc_zone(
|
|
|
|
dev_t dev, /* device where zone wanted */
|
|
|
|
zone_t z /* try to allocate new zone near this one */
|
|
|
|
)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Allocate a new zone on the indicated device and return its number. */
|
|
|
|
|
|
|
|
int major, minor;
|
|
|
|
bit_t b, bit;
|
|
|
|
struct super_block *sp;
|
|
|
|
|
|
|
|
/* Note that the routine alloc_bit() returns 1 for the lowest possible
|
|
|
|
* zone, which corresponds to sp->s_firstdatazone. To convert a value
|
|
|
|
* between the bit number, 'b', used by alloc_bit() and the zone number, 'z',
|
|
|
|
* stored in the inode, use the formula:
|
|
|
|
* z = b + sp->s_firstdatazone - 1
|
|
|
|
* Alloc_bit() never returns 0, since this is used for NO_BIT (failure).
|
|
|
|
*/
|
|
|
|
sp = get_super(dev);
|
|
|
|
|
|
|
|
/* If z is 0, skip initial part of the map known to be fully in use. */
|
|
|
|
if (z == sp->s_firstdatazone) {
|
|
|
|
bit = sp->s_zsearch;
|
|
|
|
} else {
|
2010-06-01 14:35:33 +02:00
|
|
|
bit = (bit_t) (z - (sp->s_firstdatazone - 1));
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
b = alloc_bit(sp, ZMAP, bit);
|
|
|
|
if (b == NO_BIT) {
|
|
|
|
err_code = ENOSPC;
|
|
|
|
major = (int) (sp->s_dev >> MAJOR) & BYTE;
|
|
|
|
minor = (int) (sp->s_dev >> MINOR) & BYTE;
|
2006-10-25 15:40:36 +02:00
|
|
|
printf("No space on device %d/%d\n", major, minor);
|
2005-04-21 16:53:53 +02:00
|
|
|
return(NO_ZONE);
|
|
|
|
}
|
|
|
|
if (z == sp->s_firstdatazone) sp->s_zsearch = b; /* for next time */
|
2010-06-01 14:35:33 +02:00
|
|
|
return( (zone_t) (sp->s_firstdatazone - 1) + (zone_t) b);
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* free_zone *
|
|
|
|
*===========================================================================*/
|
2010-04-13 12:58:41 +02:00
|
|
|
PUBLIC void free_zone(
|
|
|
|
dev_t dev, /* device where zone located */
|
|
|
|
zone_t numb /* zone to be returned */
|
|
|
|
)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Return a zone. */
|
|
|
|
|
|
|
|
register struct super_block *sp;
|
|
|
|
bit_t bit;
|
|
|
|
|
|
|
|
/* Locate the appropriate super_block and return bit. */
|
|
|
|
sp = get_super(dev);
|
|
|
|
if (numb < sp->s_firstdatazone || numb >= sp->s_zones) return;
|
2010-06-01 14:35:33 +02:00
|
|
|
bit = (bit_t) (numb - (zone_t) (sp->s_firstdatazone - 1));
|
2005-04-21 16:53:53 +02:00
|
|
|
free_bit(sp, ZMAP, bit);
|
|
|
|
if (bit < sp->s_zsearch) sp->s_zsearch = bit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* rw_block *
|
|
|
|
*===========================================================================*/
|
2010-06-01 14:35:33 +02:00
|
|
|
PRIVATE void rw_block(bp, rw_flag)
|
2005-04-21 16:53:53 +02:00
|
|
|
register struct buf *bp; /* buffer pointer */
|
|
|
|
int rw_flag; /* READING or WRITING */
|
|
|
|
{
|
|
|
|
/* Read or write a disk block. This is the only routine in which actual disk
|
|
|
|
* I/O is invoked. If an error occurs, a message is printed here, but the error
|
|
|
|
* is not reported to the caller. If the error occurred while purging a block
|
|
|
|
* from the cache, it is not clear what the caller could do about it anyway.
|
|
|
|
*/
|
2010-06-01 14:35:33 +02:00
|
|
|
int r, op, op_failed;
|
2006-11-27 15:21:43 +01:00
|
|
|
u64_t pos;
|
2005-04-21 16:53:53 +02:00
|
|
|
dev_t dev;
|
|
|
|
|
2010-06-01 14:35:33 +02:00
|
|
|
op_failed = 0;
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
if ( (dev = bp->b_dev) != NO_DEV) {
|
2010-06-01 14:35:33 +02:00
|
|
|
pos = mul64u(bp->b_blocknr, fs_block_size);
|
|
|
|
op = (rw_flag == READING ? MFS_DEV_READ : MFS_DEV_WRITE);
|
|
|
|
r = block_dev_io(op, dev, SELF_E, bp->b_data, pos, fs_block_size);
|
|
|
|
if (r < 0) {
|
|
|
|
printf("MFS(%d) I/O error on device %d/%d, block %lu\n",
|
|
|
|
SELF_E, major(dev), minor(dev), bp->b_blocknr);
|
|
|
|
op_failed = 1;
|
|
|
|
} else if( (unsigned) r != fs_block_size) {
|
|
|
|
r = END_OF_FILE;
|
|
|
|
op_failed = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (op_failed) {
|
|
|
|
bp->b_dev = NO_DEV; /* invalidate block */
|
|
|
|
|
|
|
|
/* Report read errors to interested parties. */
|
|
|
|
if (rw_flag == READING) rdwt_err = r;
|
|
|
|
}
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bp->b_dirt = CLEAN;
|
2006-03-15 16:34:12 +01:00
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* invalidate *
|
|
|
|
*===========================================================================*/
|
2010-04-13 12:58:41 +02:00
|
|
|
PUBLIC void invalidate(
|
|
|
|
dev_t device /* device whose blocks are to be purged */
|
|
|
|
)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Remove all the blocks belonging to some device from the cache. */
|
|
|
|
|
|
|
|
register struct buf *bp;
|
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
|
2005-04-21 16:53:53 +02:00
|
|
|
if (bp->b_dev == device) bp->b_dev = NO_DEV;
|
2010-05-05 13:35:04 +02:00
|
|
|
|
|
|
|
vm_forgetblocks();
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
2005-09-11 18:45:46 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* flushall *
|
|
|
|
*===========================================================================*/
|
2010-04-13 12:58:41 +02:00
|
|
|
PUBLIC void flushall(
|
|
|
|
dev_t dev /* device to flush */
|
|
|
|
)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Flush all dirty blocks for one device. */
|
|
|
|
|
|
|
|
register struct buf *bp;
|
2008-11-19 13:26:10 +01:00
|
|
|
static struct buf **dirty; /* static so it isn't on stack */
|
2010-06-01 14:35:33 +02:00
|
|
|
static unsigned int dirtylistsize = 0;
|
2005-04-21 16:53:53 +02:00
|
|
|
int ndirty;
|
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
if(dirtylistsize != nr_bufs) {
|
2010-06-01 14:35:33 +02:00
|
|
|
if(dirtylistsize > 0) {
|
|
|
|
assert(dirty != NULL);
|
2010-05-05 13:35:04 +02:00
|
|
|
free(dirty);
|
2010-06-01 14:35:33 +02:00
|
|
|
}
|
2010-05-05 13:35:04 +02:00
|
|
|
if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs)))
|
|
|
|
panic("couldn't allocate dirty buf list");
|
|
|
|
dirtylistsize = nr_bufs;
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++)
|
2005-04-21 16:53:53 +02:00
|
|
|
if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp;
|
|
|
|
rw_scattered(dev, dirty, ndirty, WRITING);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* rw_scattered *
|
|
|
|
*===========================================================================*/
|
2010-04-13 12:58:41 +02:00
|
|
|
PUBLIC void rw_scattered(
|
|
|
|
dev_t dev, /* major-minor device number */
|
|
|
|
struct buf **bufq, /* pointer to array of buffers */
|
|
|
|
int bufqsize, /* number of buffers */
|
|
|
|
int rw_flag /* READING or WRITING */
|
|
|
|
)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* Read or write scattered data from a device. */
|
|
|
|
|
|
|
|
register struct buf *bp;
|
|
|
|
int gap;
|
|
|
|
register int i;
|
|
|
|
register iovec_t *iop;
|
2008-11-19 13:26:10 +01:00
|
|
|
static iovec_t *iovec = NULL;
|
2005-04-21 16:53:53 +02:00
|
|
|
int j, r;
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
STATICINIT(iovec, NR_IOREQS);
|
2005-04-21 16:53:53 +02:00
|
|
|
|
|
|
|
/* (Shell) sort buffers on b_blocknr. */
|
|
|
|
gap = 1;
|
|
|
|
do
|
|
|
|
gap = 3 * gap + 1;
|
|
|
|
while (gap <= bufqsize);
|
|
|
|
while (gap != 1) {
|
|
|
|
gap /= 3;
|
|
|
|
for (j = gap; j < bufqsize; j++) {
|
|
|
|
for (i = j - gap;
|
|
|
|
i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr;
|
|
|
|
i -= gap) {
|
|
|
|
bp = bufq[i];
|
|
|
|
bufq[i] = bufq[i + gap];
|
|
|
|
bufq[i + gap] = bp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set up I/O vector and do I/O. The result of dev_io is OK if everything
|
|
|
|
* went fine, otherwise the error code for the first failed transfer.
|
|
|
|
*/
|
|
|
|
while (bufqsize > 0) {
|
|
|
|
for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) {
|
|
|
|
bp = bufq[j];
|
2010-06-01 14:35:33 +02:00
|
|
|
if (bp->b_blocknr != (block_t) bufq[0]->b_blocknr + j) break;
|
2005-04-21 16:53:53 +02:00
|
|
|
iop->iov_addr = (vir_bytes) bp->b_data;
|
2010-06-01 14:35:33 +02:00
|
|
|
iop->iov_size = (vir_bytes) fs_block_size;
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
2007-02-07 17:22:19 +01:00
|
|
|
r = block_dev_io(rw_flag == WRITING ? MFS_DEV_SCATTER : MFS_DEV_GATHER,
|
2006-10-25 15:40:36 +02:00
|
|
|
dev, SELF_E, iovec,
|
2010-06-01 14:35:33 +02:00
|
|
|
mul64u(bufq[0]->b_blocknr, fs_block_size), j);
|
2005-04-21 16:53:53 +02:00
|
|
|
|
|
|
|
/* Harvest the results. Dev_io reports the first error it may have
|
|
|
|
* encountered, but we only care if it's the first block that failed.
|
|
|
|
*/
|
|
|
|
for (i = 0, iop = iovec; i < j; i++, iop++) {
|
|
|
|
bp = bufq[i];
|
|
|
|
if (iop->iov_size != 0) {
|
|
|
|
/* Transfer failed. An error? Do we care? */
|
|
|
|
if (r != OK && i == 0) {
|
|
|
|
printf(
|
|
|
|
"fs: I/O error on device %d/%d, block %lu\n",
|
2010-06-01 14:35:33 +02:00
|
|
|
major(dev), minor(dev), bp->b_blocknr);
|
2005-04-21 16:53:53 +02:00
|
|
|
bp->b_dev = NO_DEV; /* invalidate block */
|
2010-05-05 13:35:04 +02:00
|
|
|
vm_forgetblocks();
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (rw_flag == READING) {
|
|
|
|
bp->b_dev = dev; /* validate block */
|
|
|
|
put_block(bp, PARTIAL_DATA_BLOCK);
|
|
|
|
} else {
|
|
|
|
bp->b_dirt = CLEAN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bufq += i;
|
|
|
|
bufqsize -= i;
|
|
|
|
if (rw_flag == READING) {
|
|
|
|
/* Don't bother reading more than the device is willing to
|
|
|
|
* give at this time. Don't forget to release those extras.
|
|
|
|
*/
|
|
|
|
while (bufqsize > 0) {
|
|
|
|
put_block(*bufq++, PARTIAL_DATA_BLOCK);
|
|
|
|
bufqsize--;
|
|
|
|
}
|
|
|
|
}
|
2005-09-11 18:45:46 +02:00
|
|
|
if (rw_flag == WRITING && i == 0) {
|
Fixed nasty bug that would cause FS to loop forever in rw_scattered (while
syncing, for instance) if blocks are unwritable. This could happen if:
. write goes beyond device boundary to a block device
. write is done for a complete block or more; thus the
block is not retrieved first (at which point it would be noticed
it doesn't exist), but the buffer is simply allocated
. at write time, the device i/o doesn't succeed, but rw_scattered
doesn't understand this and loops forever trying to get the block
written.
Currently, if no blocks can be written, the loop aborts, leaving all
buffers intact but potentially dirty. When invalidate() is called on the
device, the buffers will disappear (even if dirty). Same story for if
the buffer is removed due to rmed from lru chain. There's not much we
can do about this, however - we can't keep these blocks around, forever
occupying a buffer in the buffer cache.
The second part of the solution is not to let unwritable buffers be
created in the first place. How to do this, however, without doing a
wasteful read first?
It looks like this code was in 2.0.4 too.
2005-06-20 00:33:49 +02:00
|
|
|
/* We're not making progress, this means we might keep
|
|
|
|
* looping. Buffers remain dirty if un-written. Buffers are
|
|
|
|
* lost if invalidate()d or LRU-removed while dirty. This
|
|
|
|
* is better than keeping unwritable blocks around forever..
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
}
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* rm_lru *
|
|
|
|
*===========================================================================*/
|
|
|
|
PRIVATE void rm_lru(bp)
|
|
|
|
struct buf *bp;
|
|
|
|
{
|
|
|
|
/* Remove a block from its LRU chain. */
|
|
|
|
struct buf *next_ptr, *prev_ptr;
|
|
|
|
|
|
|
|
bufs_in_use++;
|
|
|
|
next_ptr = bp->b_next; /* successor on LRU chain */
|
|
|
|
prev_ptr = bp->b_prev; /* predecessor on LRU chain */
|
2010-05-10 15:26:00 +02:00
|
|
|
if (prev_ptr != NULL)
|
2005-04-21 16:53:53 +02:00
|
|
|
prev_ptr->b_next = next_ptr;
|
|
|
|
else
|
|
|
|
front = next_ptr; /* this block was at front of chain */
|
|
|
|
|
2010-05-10 15:26:00 +02:00
|
|
|
if (next_ptr != NULL)
|
2005-04-21 16:53:53 +02:00
|
|
|
next_ptr->b_prev = prev_ptr;
|
|
|
|
else
|
|
|
|
rear = prev_ptr; /* this block was at rear of chain */
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/*===========================================================================*
|
2011-02-28 15:19:19 +01:00
|
|
|
* cache_resize *
|
2008-11-19 13:26:10 +01:00
|
|
|
*===========================================================================*/
|
2011-02-28 15:19:19 +01:00
|
|
|
PRIVATE void cache_resize(unsigned int blocksize, unsigned int bufs)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2010-06-01 14:35:33 +02:00
|
|
|
struct buf *bp;
|
|
|
|
struct inode *rip;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2011-02-28 15:19:19 +01:00
|
|
|
#define MINBUFS 10
|
|
|
|
assert(blocksize > 0);
|
|
|
|
assert(bufs >= MINBUFS);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-06-01 14:35:33 +02:00
|
|
|
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
|
|
|
|
if(bp->b_count != 0) panic("change blocksize with buffer in use");
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-06-01 14:35:33 +02:00
|
|
|
for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
|
|
|
|
if (rip->i_count > 0) panic("change blocksize with inode in use");
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2011-02-28 15:19:19 +01:00
|
|
|
buf_pool(bufs);
|
|
|
|
|
2010-06-01 14:35:33 +02:00
|
|
|
fs_block_size = blocksize;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2011-02-28 15:19:19 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* bufs_heuristic *
|
|
|
|
*===========================================================================*/
|
|
|
|
PRIVATE int bufs_heuristic(struct super_block *sp)
|
|
|
|
{
|
|
|
|
struct vm_stats_info vsi;
|
2011-06-09 16:09:13 +02:00
|
|
|
int bufs;
|
2011-02-28 15:19:19 +01:00
|
|
|
u32_t btotal, bfree, bused, kbytes_used_fs,
|
|
|
|
kbytes_total_fs, kbcache, kb_fsmax;
|
|
|
|
u32_t kbytes_remain_mem;
|
|
|
|
|
|
|
|
/* but we simply need MINBUFS no matter what, and we don't
|
|
|
|
* want more than that if we're a memory device
|
|
|
|
*/
|
|
|
|
if(major(sp->s_dev) == MEMORY_MAJOR) {
|
|
|
|
bufs = MINBUFS;
|
|
|
|
return bufs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* set a reasonable cache size; cache at most a certain
|
|
|
|
* portion of the used FS, and at most a certain %age of remaining
|
|
|
|
* memory
|
|
|
|
*/
|
|
|
|
if((vm_info_stats(&vsi) != OK)) {
|
|
|
|
bufs = 1024;
|
|
|
|
printf("mfs: heuristic info fail: default to %d bufs\n", bufs);
|
|
|
|
return bufs;
|
|
|
|
}
|
|
|
|
|
|
|
|
kbytes_remain_mem = div64u(mul64u(vsi.vsi_free, vsi.vsi_pagesize), 1024);
|
|
|
|
|
|
|
|
/* check fs usage. */
|
|
|
|
blockstats(&btotal, &bfree, &bused);
|
|
|
|
kbytes_used_fs = div64u(mul64u(bused, sp->s_block_size), 1024);
|
|
|
|
kbytes_total_fs = div64u(mul64u(btotal, sp->s_block_size), 1024);
|
|
|
|
|
|
|
|
/* heuristic for a desired cache size based on FS usage;
|
|
|
|
* but never bigger than half of the total filesystem
|
|
|
|
*/
|
2011-07-04 02:51:12 +02:00
|
|
|
kb_fsmax = sqrt_approx(kbytes_used_fs)*40;
|
2011-02-28 15:19:19 +01:00
|
|
|
kb_fsmax = MIN(kb_fsmax, kbytes_total_fs/2);
|
|
|
|
|
|
|
|
/* heuristic for a maximum usage - 10% of remaining memory */
|
|
|
|
kbcache = MIN(kbytes_remain_mem/10, kb_fsmax);
|
|
|
|
bufs = kbcache * 1024 / sp->s_block_size;
|
|
|
|
|
|
|
|
/* but we simply need MINBUFS no matter what */
|
|
|
|
if(bufs < MINBUFS)
|
|
|
|
bufs = MINBUFS;
|
|
|
|
|
|
|
|
return bufs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* set_blocksize *
|
|
|
|
*===========================================================================*/
|
|
|
|
PUBLIC void set_blocksize(struct super_block *sp)
|
|
|
|
{
|
|
|
|
int bufs;
|
|
|
|
|
|
|
|
cache_resize(sp->s_block_size, MINBUFS);
|
|
|
|
bufs = bufs_heuristic(sp);
|
|
|
|
cache_resize(sp->s_block_size, bufs);
|
|
|
|
}
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* buf_pool *
|
|
|
|
*===========================================================================*/
|
2010-05-05 13:35:04 +02:00
|
|
|
PUBLIC void buf_pool(int new_nr_bufs)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
|
|
|
/* Initialize the buffer pool. */
|
|
|
|
register struct buf *bp;
|
|
|
|
|
2011-02-28 15:19:19 +01:00
|
|
|
assert(new_nr_bufs >= MINBUFS);
|
2010-05-05 13:35:04 +02:00
|
|
|
|
|
|
|
if(nr_bufs > 0) {
|
|
|
|
assert(buf);
|
2010-06-01 14:35:33 +02:00
|
|
|
(void) fs_sync();
|
2010-05-05 13:35:04 +02:00
|
|
|
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
|
|
|
|
if(bp->bp) {
|
|
|
|
assert(bp->b_bytes > 0);
|
|
|
|
free_contig(bp->bp, bp->b_bytes);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(buf)
|
|
|
|
free(buf);
|
|
|
|
|
|
|
|
if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs)))
|
|
|
|
panic("couldn't allocate buf list (%d)", new_nr_bufs);
|
|
|
|
|
|
|
|
if(buf_hash)
|
|
|
|
free(buf_hash);
|
|
|
|
if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs)))
|
|
|
|
panic("couldn't allocate buf hash list (%d)", new_nr_bufs);
|
|
|
|
|
|
|
|
nr_bufs = new_nr_bufs;
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
bufs_in_use = 0;
|
|
|
|
front = &buf[0];
|
2010-05-05 13:35:04 +02:00
|
|
|
rear = &buf[nr_bufs - 1];
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
|
2008-11-19 13:26:10 +01:00
|
|
|
bp->b_blocknr = NO_BLOCK;
|
|
|
|
bp->b_dev = NO_DEV;
|
|
|
|
bp->b_next = bp + 1;
|
|
|
|
bp->b_prev = bp - 1;
|
|
|
|
bp->bp = NULL;
|
|
|
|
bp->b_bytes = 0;
|
|
|
|
}
|
2010-06-01 14:35:33 +02:00
|
|
|
front->b_prev = NULL;
|
|
|
|
rear->b_next = NULL;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->b_hash = bp->b_next;
|
2008-11-19 13:26:10 +01:00
|
|
|
buf_hash[0] = front;
|
|
|
|
|
2010-05-05 13:35:04 +02:00
|
|
|
vm_forgetblocks();
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|