Add support for the ext2 file system. Contributed by Evgeniy Ivanov.

This commit is contained in:
Thomas Veerman 2010-08-02 11:44:45 +00:00
parent 253b4b3fe5
commit dff0434eaf
34 changed files with 7699 additions and 1 deletions

View file

@ -1,3 +1,7 @@
20100802:
/usr/src/etc/system.conf updated to include ext2 file server: copy it
(or merge it) to /etc/system.conf.
20100719:
If you installed using a 3.1.6 image (or earlier) and haven't updated
the boot monitor since r6246 you need to do so now:

View file

@ -119,6 +119,19 @@ service mfs
quantum 500; # default server quantum
};
service ext2
{
ipc ALL; # ALL ipc targets allowed
system BASIC; # Only basic kernel calls allowed
vm BASIC; # Only basic VM calls allowed
io NONE; # No I/O range allowed
irq NONE; # No IRQ allowed
sigmgr rs; # Signal manager is RS
scheduler sched; # Scheduler is sched
priority 5; # priority queue 5
quantum 500; # default server quantum
};
service pfs
{
uid 0;

View file

@ -3,7 +3,7 @@
.include <bsd.own.mk>
SUBDIR= ds hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm
SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm
IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm

19
servers/ext2/Makefile Normal file
View file

@ -0,0 +1,19 @@
# Makefile for ext2 filesystem
PROG= ext2
SRCS= balloc.c cache.c device.c link.c \
mount.c misc.c open.c protect.c read.c \
stadir.c table.c time.c utility.c \
write.c ialloc.c inode.c main.c path.c \
super.c optset.c
DPADD+= ${LIBSYS}
LDADD+= -lsys
MAN=
BINDIR?= /sbin
INSTALLFLAGS+= -S 128k
DEFAULT_NR_BUFS= 1024
CPPFLAGS+= -DDEFAULT_NR_BUFS=${DEFAULT_NR_BUFS}
.include <bsd.prog.mk>

354
servers/ext2/balloc.c Normal file
View file

@ -0,0 +1,354 @@
/* This files manages blocks allocation and deallocation.
*
* The entry points into this file are:
* discard_preallocated_blocks: Discard preallocated blocks.
* alloc_block: somebody wants to allocate a block; find one.
* free_block: indicate that a block is available for new allocation.
*
* Created:
* June 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <string.h>
#include <stdlib.h>
#include <minix/com.h>
#include <minix/u64.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include "const.h"
FORWARD _PROTOTYPE( block_t alloc_block_bit, (struct super_block *sp,
block_t origin,
struct inode *rip));
/*===========================================================================*
* discard_preallocated_blocks *
*===========================================================================*/
PUBLIC void discard_preallocated_blocks(struct inode *rip)
{
/* When called for rip, discard (free) blocks preallocated for rip,
* otherwise discard all preallocated blocks.
* Normally it should be called in following situations:
* 1. File is closed.
* 2. File is truncated.
* 3. Non-sequential write.
* 4. inode is "unloaded" from the memory.
* 5. No free blocks left (discard all preallocated blocks).
*/
int i;
if (rip) {
rip->i_prealloc_count = rip->i_prealloc_index = 0;
for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) {
if (rip->i_prealloc_blocks[i] != NO_BLOCK) {
free_block(rip->i_sp, rip->i_prealloc_blocks[i]);
rip->i_prealloc_blocks[i] = NO_BLOCK;
}
}
return;
}
/* Discard all allocated blocks.
* Probably there are just few blocks on the disc, so forbid preallocation.*/
for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++) {
rip->i_prealloc_count = rip->i_prealloc_index = 0;
rip->i_preallocation = 0; /* forbid preallocation */
for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) {
if (rip->i_prealloc_blocks[i] != NO_BLOCK) {
free_block(rip->i_sp, rip->i_prealloc_blocks[i]);
rip->i_prealloc_blocks[i] = NO_BLOCK;
}
}
}
}
/*===========================================================================*
* alloc_block *
*===========================================================================*/
PUBLIC block_t alloc_block(struct inode *rip, block_t block)
{
/* Allocate a block for inode. If block is provided, then use it as a goal:
* try to allocate this block or his neghbors.
* If block is not provided then goal is group, where inode lives.
*/
block_t goal;
block_t b;
struct super_block *sp = rip->i_sp;
if (sp->s_rd_only)
panic("can't alloc block on read-only filesys.");
/* Check for free blocks. First time discard preallocation,
* next time return NO_BLOCK
*/
if (!opt.use_reserved_blocks &&
sp->s_free_blocks_count <= sp->s_r_blocks_count) {
discard_preallocated_blocks(NULL);
} else if (sp->s_free_blocks_count <= EXT2_PREALLOC_BLOCKS) {
discard_preallocated_blocks(NULL);
}
if (!opt.use_reserved_blocks &&
sp->s_free_blocks_count <= sp->s_r_blocks_count) {
return(NO_BLOCK);
} else if (sp->s_free_blocks_count == 0) {
return(NO_BLOCK);
}
if (block != NO_BLOCK) {
goal = block;
if (rip->i_preallocation && rip->i_prealloc_count > 0) {
/* check if goal is preallocated */
b = rip->i_prealloc_blocks[rip->i_prealloc_index];
if (block == b || (block + 1) == b) {
/* use preallocated block */
rip->i_prealloc_blocks[rip->i_prealloc_index] = NO_BLOCK;
rip->i_prealloc_count--;
rip->i_prealloc_index++;
if (rip->i_prealloc_index >= EXT2_PREALLOC_BLOCKS) {
rip->i_prealloc_index = 0;
ASSERT(rip->i_prealloc_count == 0);
}
rip->i_bsearch = b;
return b;
} else {
/* probably non-sequential write operation,
* disable preallocation for this inode.
*/
rip->i_preallocation = 0;
discard_preallocated_blocks(rip);
}
}
} else {
int group = (rip->i_num - 1) / sp->s_inodes_per_group;
goal = sp->s_blocks_per_group*group + sp->s_first_data_block;
}
if (rip->i_preallocation && rip->i_prealloc_count) {
ext2_debug("There're preallocated blocks, but they're\
neither used or freed!");
}
b = alloc_block_bit(sp, goal, rip);
if (b != NO_BLOCK)
rip->i_bsearch = b;
return b;
}
FORWARD _PROTOTYPE( void check_block_number, (block_t block,
struct super_block *sp,
struct group_desc *gd) );
/*===========================================================================*
* alloc_block_bit *
*===========================================================================*/
PRIVATE block_t alloc_block_bit(sp, goal, rip)
struct super_block *sp; /* the filesystem to allocate from */
block_t goal; /* try to allocate near this block */
struct inode *rip; /* used for preallocation */
{
block_t block = NO_BLOCK; /* allocated block */
int word; /* word in block bitmap */
bit_t bit = -1;
int group;
char update_bsearch = FALSE;
int i;
if (goal >= sp->s_blocks_count ||
(goal < sp->s_first_data_block && goal != 0)) {
goal = sp->s_bsearch;
}
if (goal <= sp->s_bsearch) {
/* No reason to search in a place with no free blocks */
goal = sp->s_bsearch;
update_bsearch = TRUE;
}
/* Figure out where to start the bit search. */
word = ((goal - sp->s_first_data_block) % sp->s_blocks_per_group)
/ FS_BITCHUNK_BITS;
/* Try to allocate block at any group starting from the goal's group.
* First time goal's group is checked from the word=goal, after all
* groups checked, it's checked again from word=0, that's why "i <=".
*/
group = (goal - sp->s_first_data_block) / sp->s_blocks_per_group;
for (i = 0; i <= sp->s_groups_count; i++, group++) {
struct buf *bp;
struct group_desc *gd;
if (group >= sp->s_groups_count)
group = 0;
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc block");
if (gd->free_blocks_count == 0) {
word = 0;
continue;
}
bp = get_block(sp->s_dev, gd->block_bitmap, NORMAL);
if (rip->i_preallocation &&
gd->free_blocks_count >= (EXT2_PREALLOC_BLOCKS * 4) ) {
/* Try to preallocate blocks */
if (rip->i_prealloc_count != 0) {
/* kind of glitch... */
discard_preallocated_blocks(rip);
ext2_debug("warning, discarding previously preallocated\
blocks! It had to be done by another code.");
}
ASSERT(rip->i_prealloc_count == 0);
/* we preallocate bytes only */
ASSERT(EXT2_PREALLOC_BLOCKS == sizeof(char)*CHAR_BIT);
bit = setbyte(bp->b_bitmap, sp->s_blocks_per_group, word);
if (bit != -1) {
block = bit + sp->s_first_data_block +
group * sp->s_blocks_per_group;
check_block_number(block, sp, gd);
/* We preallocate a byte starting from block.
* First preallocated block will be returned as
* normally allocated block.
*/
for (i = 1; i < EXT2_PREALLOC_BLOCKS; i++) {
check_block_number(block + i, sp, gd);
rip->i_prealloc_blocks[i-1] = block + i;
}
rip->i_prealloc_index = 0;
rip->i_prealloc_count = EXT2_PREALLOC_BLOCKS - 1;
bp->b_dirt = DIRTY; /* by setbyte */
put_block(bp, MAP_BLOCK);
gd->free_blocks_count -= EXT2_PREALLOC_BLOCKS;
sp->s_free_blocks_count -= EXT2_PREALLOC_BLOCKS;
group_descriptors_dirty = DIRTY;
return block;
}
}
bit = setbit(bp->b_bitmap, sp->s_blocks_per_group, word);
if (bit == -1) {
if (word == 0) {
panic("ext2: allocator failed to allocate a bit in bitmap\
with free bits.");
} else {
word = 0;
continue;
}
}
block = sp->s_first_data_block + group * sp->s_blocks_per_group + bit;
check_block_number(block, sp, gd);
bp->b_dirt = DIRTY; /* Now it's safe to mark it as dirty */
put_block(bp, MAP_BLOCK);
gd->free_blocks_count--;
sp->s_free_blocks_count--;
group_descriptors_dirty = DIRTY;
if (update_bsearch && block != -1 && block != NO_BLOCK) {
/* We searched from the beginning, update bsearch. */
sp->s_bsearch = block;
}
return block;
}
return block;
}
/*===========================================================================*
* free_block *
*===========================================================================*/
PUBLIC void free_block(struct super_block *sp, bit_t bit_returned)
{
/* Return a block by turning off its bitmap bit. */
int group; /* group number of bit_returned */
int bit; /* bit_returned number within its group */
struct buf *bp;
struct group_desc *gd;
if (sp->s_rd_only)
panic("can't free bit on read-only filesys.");
if (bit_returned >= sp->s_blocks_count ||
bit_returned < sp->s_first_data_block)
panic("trying to free block %d beyond blocks scope.",
bit_returned);
/* At first search group, to which bit_returned belongs to
* and figure out in what word bit is stored.
*/
group = (bit_returned - sp->s_first_data_block) / sp->s_blocks_per_group;
bit = (bit_returned - sp->s_first_data_block) % sp->s_blocks_per_group;
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc block");
/* We might be buggy (No way! :P), so check if we deallocate
* data block, but not control (system) block.
* This should never happen.
*/
if (bit_returned == gd->inode_bitmap || bit_returned == gd->block_bitmap
|| (bit_returned >= gd->inode_table
&& bit_returned < (gd->inode_table + sp->s_itb_per_group))) {
ext2_debug("ext2: freeing non-data block %d\n", bit_returned);
panic("trying to deallocate \
system/control block, hardly poke author.");
}
bp = get_block(sp->s_dev, gd->block_bitmap, NORMAL);
if (unsetbit(bp->b_bitmap, bit))
panic("Tried to free unused block", bit_returned);
bp->b_dirt = DIRTY;
put_block(bp, MAP_BLOCK);
gd->free_blocks_count++;
sp->s_free_blocks_count++;
group_descriptors_dirty = DIRTY;
if (bit_returned < sp->s_bsearch)
sp->s_bsearch = bit_returned;
}
PRIVATE void check_block_number(block_t block, struct super_block *sp,
struct group_desc *gd)
{
/* Check if we allocated a data block, but not control (system) block.
* Only major bug can cause us to allocate wrong block. If it happens,
* we panic (and don't bloat filesystem's bitmap).
*/
if (block == gd->inode_bitmap || block == gd->block_bitmap ||
(block >= gd->inode_table
&& block < (gd->inode_table + sp->s_itb_per_group))) {
ext2_debug("ext2: allocating non-data block %d\n", block);
panic("ext2: block allocator tryed to return \
system/control block, poke author.\n");
}
if (block >= sp->s_blocks_count) {
panic("ext2: allocator returned blocknum greater, than \
total number of blocks.\n");
}
}

53
servers/ext2/buf.h Normal file
View file

@ -0,0 +1,53 @@
/* Buffer (block) cache. To acquire a block, a routine calls get_block(),
* telling which block it wants. The block is then regarded as "in use"
* and has its 'b_count' field incremented. All the blocks that are not
* in use are chained together in an LRU list, with 'front' pointing
* to the least recently used block, and 'rear' to the most recently used
* block. A reverse chain, using the field b_prev is also maintained.
* Usage for LRU is measured by the time the put_block() is done. The second
* parameter to put_block() can violate the LRU order and put a block on the
* front of the list, if it will probably not be needed soon. If a block
* is modified, the modifying routine must set b_dirt to DIRTY, so the block
* will eventually be rewritten to the disk.
*/
#ifndef EXT2_BUF_H
#define EXT2_BUF_H
#include <sys/dir.h> /* need struct direct */
#include <dirent.h>
union fsdata_u {
char b__data[_MAX_BLOCK_SIZE]; /* ordinary user data */
/* indirect block */
block_t b__ind[_MAX_BLOCK_SIZE/sizeof(block_t)];
/* bit map block */
bitchunk_t b__bitmap[FS_BITMAP_CHUNKS(_MAX_BLOCK_SIZE)];
};
/* A block is free if b_dev == NO_DEV. */
/* These defs make it possible to use to bp->b_data instead of bp->b.b__data */
#define b_data bp->b__data
#define b_ind bp->b__ind
#define b_ino bp->b__ino
#define b_bitmap bp->b__bitmap
#define BUFHASH(b) ((b) % nr_bufs)
EXTERN struct buf *front; /* points to least recently used free block */
EXTERN struct buf *rear; /* points to most recently used free block */
EXTERN unsigned int bufs_in_use; /* # bufs currently in use (not on free list)*/
/* When a block is released, the type of usage is passed to put_block(). */
#define WRITE_IMMED 0100 /* block should be written to disk now */
#define ONE_SHOT 0200 /* set if block not likely to be needed soon */
#define INODE_BLOCK 0 /* inode block */
#define DIRECTORY_BLOCK 1 /* directory block */
#define INDIRECT_BLOCK 2 /* pointer block */
#define MAP_BLOCK 3 /* bit map */
#define FULL_DATA_BLOCK 5 /* data, fully used */
#define PARTIAL_DATA_BLOCK 6 /* data, partly used*/
#endif /* EXT2_BUF_H */

562
servers/ext2/cache.c Normal file
View file

@ -0,0 +1,562 @@
/* The file system maintains a buffer cache to reduce the number of disk
* accesses needed. Whenever a read or write to the disk is done, a check is
* first made to see if the block is in the cache. This file manages the
* cache.
*
* The entry points into this file are:
* get_block: request to fetch a block for reading or writing from cache
* put_block: return a block previously requested with get_block
* invalidate: remove all the cache blocks on some device
*
* Private functions:
* rw_block: read or write a block from the disk itself
*
* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <minix/u64.h>
#include <stdlib.h>
#include <assert.h>
#include "buf.h"
#include "super.h"
#include "inode.h"
FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) );
FORWARD _PROTOTYPE( void rw_block, (struct buf *, int) );
PRIVATE int vmcache_avail = -1; /* 0 if not available, >0 if available. */
/*===========================================================================*
* get_block *
*===========================================================================*/
PUBLIC struct buf *get_block(
register dev_t dev, /* on which device is the block? */
register block_t block, /* which block is wanted? */
int only_search /* if NO_READ, don't read, else act normal */
)
{
/* Check to see if the requested block is in the block cache. If so, return
* a pointer to it. If not, evict some other block and fetch it (unless
* 'only_search' is 1). All the blocks in the cache that are not in use
* are linked together in a chain, with 'front' pointing to the least recently
* used block and 'rear' to the most recently used block. If 'only_search' is
* 1, the block being requested will be overwritten in its entirety, so it is
* only necessary to see if it is in the cache; if it is not, any free buffer
* will do. It is not necessary to actually read the block in from disk.
* If 'only_search' is PREFETCH, the block need not be read from the disk,
* and the device is not to be marked on the block, so callers can tell if
* the block returned is valid.
* In addition to the LRU chain, there is also a hash chain to link together
* blocks whose block numbers end with the same bit strings, for fast lookup.
*/
int b;
static struct buf *bp, *prev_ptr;
u64_t yieldid = VM_BLOCKID_NONE, getid = make64(dev, block);
int vmcache = 0;
assert(buf_hash);
assert(buf);
assert(nr_bufs > 0);
if(vmcache_avail < 0) {
/* Test once for the availability of the vm yield block feature. */
if(vm_forgetblock(VM_BLOCKID_NONE) == ENOSYS) {
vmcache_avail = 0;
} else {
vmcache_avail = 1;
}
}
/* use vmcache if it's available, and allowed, and we're not doing
* i/o on a ram disk device.
*/
if(vmcache_avail && may_use_vmcache && major(dev) != MEMORY_MAJOR)
vmcache = 1;
ASSERT(fs_block_size > 0);
/* Search the hash chain for (dev, block). Do_read() can use
* get_block(NO_DEV ...) to get an unnamed block to fill with zeros when
* someone wants to read from a hole in a file, in which case this search
* is skipped
*/
if (dev != NO_DEV) {
b = BUFHASH(block);
bp = buf_hash[b];
while (bp != NULL) {
if (bp->b_blocknr == block && bp->b_dev == dev) {
/* Block needed has been found. */
if (bp->b_count == 0) rm_lru(bp);
bp->b_count++; /* record that block is in use */
ASSERT(bp->b_bytes == fs_block_size);
ASSERT(bp->b_dev == dev);
ASSERT(bp->b_dev != NO_DEV);
ASSERT(bp->bp);
return(bp);
} else {
/* This block is not the one sought. */
bp = bp->b_hash; /* move to next block on hash chain */
}
}
}
/* Desired block is not on available chain. Take oldest block ('front'). */
if ((bp = front) == NULL) panic("all buffers in use", nr_bufs);
if(bp->b_bytes < fs_block_size) {
ASSERT(!bp->bp);
ASSERT(bp->b_bytes == 0);
if(!(bp->bp = alloc_contig( (size_t) fs_block_size, 0, NULL))) {
ext2_debug("ext2: couldn't allocate a new block.\n");
for(bp = front;
bp && bp->b_bytes < fs_block_size; bp = bp->b_next)
;
if(!bp) {
panic("no buffer available");
}
} else {
bp->b_bytes = fs_block_size;
}
}
ASSERT(bp);
ASSERT(bp->bp);
ASSERT(bp->b_bytes == fs_block_size);
ASSERT(bp->b_count == 0);
rm_lru(bp);
/* Remove the block that was just taken from its hash chain. */
b = BUFHASH(bp->b_blocknr);
prev_ptr = buf_hash[b];
if (prev_ptr == bp) {
buf_hash[b] = bp->b_hash;
} else {
/* The block just taken is not on the front of its hash chain. */
while (prev_ptr->b_hash != NULL)
if (prev_ptr->b_hash == bp) {
prev_ptr->b_hash = bp->b_hash; /* found it */
break;
} else {
prev_ptr = prev_ptr->b_hash; /* keep looking */
}
}
/* If the block taken is dirty, make it clean by writing it to the disk.
* Avoid hysteresis by flushing all other dirty blocks for the same device.
*/
if (bp->b_dev != NO_DEV) {
if (bp->b_dirt == DIRTY) flushall(bp->b_dev);
/* Are we throwing out a block that contained something?
* Give it to VM for the second-layer cache.
*/
yieldid = make64(bp->b_dev, bp->b_blocknr);
assert(bp->b_bytes == fs_block_size);
bp->b_dev = NO_DEV;
}
/* Fill in block's parameters and add it to the hash chain where it goes. */
bp->b_dev = dev; /* fill in device number */
bp->b_blocknr = block; /* fill in block number */
bp->b_count++; /* record that block is being used */
b = BUFHASH(bp->b_blocknr);
bp->b_hash = buf_hash[b];
buf_hash[b] = bp; /* add to hash list */
if(dev == NO_DEV) {
if(vmcache && cmp64(yieldid, VM_BLOCKID_NONE) != 0) {
vm_yield_block_get_block(yieldid, VM_BLOCKID_NONE,
bp->bp, fs_block_size);
}
return(bp); /* If the caller wanted a NO_DEV block, work is done. */
}
/* Go get the requested block unless searching or prefetching. */
if(only_search == PREFETCH || only_search == NORMAL) {
/* Block is not found in our cache, but we do want it
* if it's in the vm cache.
*/
if(vmcache) {
/* If we can satisfy the PREFETCH or NORMAL request
* from the vm cache, work is done.
*/
if(vm_yield_block_get_block(yieldid, getid,
bp->bp, fs_block_size) == OK) {
return bp;
}
}
}
if(only_search == PREFETCH) {
/* PREFETCH: don't do i/o. */
bp->b_dev = NO_DEV;
} else if (only_search == NORMAL) {
rw_block(bp, READING);
} else if(only_search == NO_READ) {
/* we want this block, but its contents
* will be overwritten. VM has to forget
* about it.
*/
if(vmcache) {
vm_forgetblock(getid);
}
} else
panic("unexpected only_search value: %d", only_search);
assert(bp->bp);
return(bp); /* return the newly acquired block */
}
/*===========================================================================*
* put_block *
*===========================================================================*/
PUBLIC void put_block(
register struct buf *bp, /* pointer to the buffer to be released */
int block_type /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */
)
{
/* Return a block to the list of available blocks. Depending on 'block_type'
* it may be put on the front or rear of the LRU chain. Blocks that are
* expected to be needed again shortly (e.g., partially full data blocks)
* go on the rear; blocks that are unlikely to be needed again shortly
* (e.g., full data blocks) go on the front. Blocks whose loss can hurt
* the integrity of the file system (e.g., inode blocks) are written to
* disk immediately if they are dirty.
*/
if (bp == NULL) return; /* it is easier to check here than in caller */
bp->b_count--; /* there is one use fewer now */
if (bp->b_count != 0) return; /* block is still in use */
bufs_in_use--; /* one fewer block buffers in use */
/* Put this block back on the LRU chain. If the ONE_SHOT bit is set in
* 'block_type', the block is not likely to be needed again shortly, so put
* it on the front of the LRU chain where it will be the first one to be
* taken when a free buffer is needed later.
*/
if (bp->b_dev == DEV_RAM || (block_type & ONE_SHOT)) {
/* Block probably won't be needed quickly. Put it on front of chain.
* It will be the next block to be evicted from the cache.
*/
bp->b_prev = NULL;
bp->b_next = front;
if (front == NULL)
rear = bp; /* LRU chain was empty */
else
front->b_prev = bp;
front = bp;
}
else {
/* Block probably will be needed quickly. Put it on rear of chain.
* It will not be evicted from the cache for a long time.
*/
bp->b_prev = rear;
bp->b_next = NULL;
if (rear == NULL)
front = bp;
else
rear->b_next = bp;
rear = bp;
}
/* Some blocks are so important (e.g., inodes, indirect blocks) that they
* should be written to the disk immediately to avoid messing up the file
* system in the event of a crash.
*/
if ((block_type & WRITE_IMMED) && bp->b_dirt==DIRTY && bp->b_dev != NO_DEV) {
rw_block(bp, WRITING);
}
}
/*===========================================================================*
* rw_block *
*===========================================================================*/
PRIVATE void rw_block(
register struct buf *bp, /* buffer pointer */
int rw_flag /* READING or WRITING */
)
{
/* Read or write a disk block. This is the only routine in which actual disk
* I/O is invoked. If an error occurs, a message is printed here, but the error
* is not reported to the caller. If the error occurred while purging a block
* from the cache, it is not clear what the caller could do about it anyway.
*/
int r, op, op_failed = 0;
u64_t pos;
dev_t dev;
if ( (dev = bp->b_dev) != NO_DEV) {
pos = mul64u(bp->b_blocknr, fs_block_size);
op = (rw_flag == READING ? MFS_DEV_READ : MFS_DEV_WRITE);
r = block_dev_io(op, dev, SELF_E, bp->b_data, pos, fs_block_size);
if (r < 0) {
printf("Ext2(%d) I/O error on device %d/%d, block %lu\n",
SELF_E, major(dev), minor(dev), bp->b_blocknr);
op_failed = 1;
} else if( (unsigned) r != fs_block_size) {
r = END_OF_FILE;
op_failed = 1;
}
if (op_failed) {
bp->b_dev = NO_DEV; /* invalidate block */
/* Report read errors to interested parties. */
if (rw_flag == READING) rdwt_err = r;
}
}
bp->b_dirt = CLEAN;
}
/*===========================================================================*
* invalidate *
*===========================================================================*/
PUBLIC void invalidate(
dev_t device /* device whose blocks are to be purged */
)
{
/* Remove all the blocks belonging to some device from the cache. */
register struct buf *bp;
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
if (bp->b_dev == device) bp->b_dev = NO_DEV;
vm_forgetblocks();
}
/*===========================================================================*
* flushall *
*===========================================================================*/
PUBLIC void flushall(
dev_t dev /* device to flush */
)
{
/* Flush all dirty blocks for one device. */
register struct buf *bp;
static struct buf **dirty; /* static so it isn't on stack */
static int unsigned dirtylistsize = 0;
int ndirty;
if(dirtylistsize != nr_bufs) {
if(dirtylistsize > 0) {
assert(dirty != NULL);
free(dirty);
}
if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs)))
panic("couldn't allocate dirty buf list");
dirtylistsize = nr_bufs;
}
for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++)
if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp;
rw_scattered(dev, dirty, ndirty, WRITING);
}
/*===========================================================================*
* rw_scattered *
*===========================================================================*/
PUBLIC void rw_scattered(
dev_t dev, /* major-minor device number */
struct buf **bufq, /* pointer to array of buffers */
int bufqsize, /* number of buffers */
int rw_flag /* READING or WRITING */
)
{
/* Read or write scattered data from a device. */
register struct buf *bp;
int gap;
register int i;
register iovec_t *iop;
static iovec_t *iovec = NULL;
int j, r;
STATICINIT(iovec, NR_IOREQS);
/* (Shell) sort buffers on b_blocknr. */
gap = 1;
do
gap = 3 * gap + 1;
while (gap <= bufqsize);
while (gap != 1) {
gap /= 3;
for (j = gap; j < bufqsize; j++) {
for (i = j - gap;
i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr;
i -= gap) {
bp = bufq[i];
bufq[i] = bufq[i + gap];
bufq[i + gap] = bp;
}
}
}
/* Set up I/O vector and do I/O. The result of dev_io is OK if everything
* went fine, otherwise the error code for the first failed transfer.
*/
while (bufqsize > 0) {
for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) {
bp = bufq[j];
if (bp->b_blocknr != (block_t) bufq[0]->b_blocknr + j) break;
iop->iov_addr = (vir_bytes) bp->b_data;
iop->iov_size = (vir_bytes) fs_block_size;
}
r = block_dev_io(rw_flag == WRITING ? MFS_DEV_SCATTER : MFS_DEV_GATHER,
dev, SELF_E, iovec,
mul64u(bufq[0]->b_blocknr, fs_block_size), j);
/* Harvest the results. Dev_io reports the first error it may have
* encountered, but we only care if it's the first block that failed.
*/
for (i = 0, iop = iovec; i < j; i++, iop++) {
bp = bufq[i];
if (iop->iov_size != 0) {
/* Transfer failed. An error? Do we care? */
if (r != OK && i == 0) {
printf(
"fs: I/O error on device %d/%d, block %lu\n",
major(dev), minor(dev), bp->b_blocknr);
bp->b_dev = NO_DEV; /* invalidate block */
vm_forgetblocks();
}
break;
}
if (rw_flag == READING) {
bp->b_dev = dev; /* validate block */
put_block(bp, PARTIAL_DATA_BLOCK);
} else {
bp->b_dirt = CLEAN;
}
}
bufq += i;
bufqsize -= i;
if (rw_flag == READING) {
/* Don't bother reading more than the device is willing to
* give at this time. Don't forget to release those extras.
*/
while (bufqsize > 0) {
put_block(*bufq++, PARTIAL_DATA_BLOCK);
bufqsize--;
}
}
if (rw_flag == WRITING && i == 0) {
/* We're not making progress, this means we might keep
* looping. Buffers remain dirty if un-written. Buffers are
* lost if invalidate()d or LRU-removed while dirty. This
* is better than keeping unwritable blocks around forever..
*/
break;
}
}
}
/*===========================================================================*
* rm_lru *
*===========================================================================*/
PRIVATE void rm_lru(
struct buf *bp
)
{
/* Remove a block from its LRU chain. */
struct buf *next_ptr, *prev_ptr;
bufs_in_use++;
next_ptr = bp->b_next; /* successor on LRU chain */
prev_ptr = bp->b_prev; /* predecessor on LRU chain */
if (prev_ptr != NULL)
prev_ptr->b_next = next_ptr;
else
front = next_ptr; /* this block was at front of chain */
if (next_ptr != NULL)
next_ptr->b_prev = prev_ptr;
else
rear = prev_ptr; /* this block was at rear of chain */
}
/*===========================================================================*
* set_blocksize *
*===========================================================================*/
PUBLIC void set_blocksize(unsigned int blocksize)
{
struct buf *bp;
struct inode *rip;
ASSERT(blocksize > 0);
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
if(bp->b_count != 0) panic("change blocksize with buffer in use");
for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
if (rip->i_count > 0) panic("change blocksize with inode in use");
buf_pool(nr_bufs);
fs_block_size = blocksize;
}
/*===========================================================================*
* buf_pool *
*===========================================================================*/
PUBLIC void buf_pool(int new_nr_bufs)
{
/* Initialize the buffer pool. */
register struct buf *bp;
assert(new_nr_bufs > 0);
if(nr_bufs > 0) {
assert(buf);
(void) fs_sync();
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
if(bp->bp) {
assert(bp->b_bytes > 0);
free_contig(bp->bp, bp->b_bytes);
}
}
}
if(buf)
free(buf);
if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs)))
panic("couldn't allocate buf list (%d)", new_nr_bufs);
if(buf_hash)
free(buf_hash);
if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs)))
panic("couldn't allocate buf hash list (%d)", new_nr_bufs);
nr_bufs = new_nr_bufs;
bufs_in_use = 0;
front = &buf[0];
rear = &buf[nr_bufs - 1];
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
bp->b_blocknr = NO_BLOCK;
bp->b_dev = NO_DEV;
bp->b_next = bp + 1;
bp->b_prev = bp - 1;
bp->bp = NULL;
bp->b_bytes = 0;
}
buf[0].b_prev = NULL;
buf[nr_bufs - 1].b_next = NULL;
for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->b_hash = bp->b_next;
buf_hash[0] = front;
vm_forgetblocks();
}

185
servers/ext2/const.h Normal file
View file

@ -0,0 +1,185 @@
#ifndef EXT2_CONST_H
#define EXT2_CONST_H
/* Tables sizes */
#define NR_INODES 256 /* # slots in "in core" inode table */
#define GETDENTS_BUFSIZ 257
#define INODE_HASH_LOG2 7 /* 2 based logarithm of the inode hash size */
#define INODE_HASH_SIZE ((unsigned long)1<<INODE_HASH_LOG2)
#define INODE_HASH_MASK (((unsigned long)1<<INODE_HASH_LOG2)-1)
/* The type of sizeof may be (unsigned) long. Use the following macro for
* taking the sizes of small objects so that there are no surprises like
* (small) long constants being passed to routines expecting an int.
*/
#define usizeof(t) ((unsigned) sizeof(t))
#define SUPER_MAGIC 0xEF53 /* magic number contained in super-block */
#define EXT2_NAME_MAX 255
/* Miscellaneous constants */
#define SU_UID ((uid_t) 0) /* super_user's uid_t */
#define NORMAL 0 /* forces get_block to do disk read */
#define NO_READ 1 /* prevents get_block from doing disk read */
#define PREFETCH 2 /* tells get_block not to read or mark dev */
#define NO_BIT ((bit_t) 0) /* returned by alloc_bit() to signal failure */
#define LOOK_UP 0 /* tells search_dir to lookup string */
#define ENTER 1 /* tells search_dir to make dir entry */
#define DELETE 2 /* tells search_dir to delete entry */
#define IS_EMPTY 3 /* tells search_dir to ret. OK or ENOTEMPTY */
/* write_map() args */
#define WMAP_FREE (1 << 0)
#define IGN_PERM 0
#define CHK_PERM 1
#define CLEAN 0 /* disk and memory copies identical */
#define DIRTY 1 /* disk and memory copies differ */
#define ATIME 002 /* set if atime field needs updating */
#define CTIME 004 /* set if ctime field needs updating */
#define MTIME 010 /* set if mtime field needs updating */
#define BYTE_SWAP 0 /* tells conv2/conv4 to swap bytes */
#define END_OF_FILE (-104) /* eof detected */
#define SUPER_BLOCK_BYTES (1024) /* bytes offset */
#define ROOT_INODE ((ino_t) 2) /* inode number for root directory */
#define BOOT_BLOCK ((block_t) 0) /* block number of boot block */
#define START_BLOCK ((block_t) 2) /* first block of FS (not counting SB) */
#define BLOCK_ADDRESS_BYTES 4 /* bytes per address */
#define SUPER_SIZE usizeof (struct super_block) /* sb size in RAM */
#define SUPER_SIZE_D (1024) /* max size of superblock stored on disk */
/* Directories related macroses */
#define DIR_ENTRY_ALIGN 4
/* ino + rec_len + name_len + file_type, doesn't include name and padding */
#define MIN_DIR_ENTRY_SIZE 8
#define DIR_ENTRY_CONTENTS_SIZE(d) (MIN_DIR_ENTRY_SIZE + (d)->d_name_len)
/* size with padding */
#define DIR_ENTRY_ACTUAL_SIZE(d) (DIR_ENTRY_CONTENTS_SIZE(d) + \
((DIR_ENTRY_CONTENTS_SIZE(d) & 0x03) == 0 ? 0 : \
DIR_ENTRY_ALIGN - (DIR_ENTRY_CONTENTS_SIZE(d) & 0x03) ))
/* How many bytes can be taken from the end of dentry */
#define DIR_ENTRY_SHRINK(d) (conv2(le_CPU, (d)->d_rec_len) \
- DIR_ENTRY_ACTUAL_SIZE(d))
/* Dentry can have padding, which can be used to enlarge namelen */
#define DIR_ENTRY_MAX_NAME_LEN(d) (conv2(le_CPU, (d)->d_rec_len) \
- MIN_DIR_ENTRY_SIZE)
/* Constants relative to the data blocks */
/* When change EXT2_NDIR_BLOCKS, modify ext2_max_size()!!!*/
#define EXT2_NDIR_BLOCKS 12
#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS
#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1)
#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1)
#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
#define FS_BITMAP_CHUNKS(b) ((b)/usizeof (bitchunk_t))/* # map chunks/blk */
#define FS_BITCHUNK_BITS (usizeof(bitchunk_t) * CHAR_BIT)
#define FS_BITS_PER_BLOCK(b) (FS_BITMAP_CHUNKS(b) * FS_BITCHUNK_BITS)
/* Inodes */
/* Next 4 following macroses were taken from linux' ext2_fs.h */
#define EXT2_GOOD_OLD_INODE_SIZE 128
#define EXT2_GOOD_OLD_FIRST_INO 11
#define EXT2_INODE_SIZE(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \
EXT2_GOOD_OLD_INODE_SIZE : \
(s)->s_inode_size)
#define EXT2_FIRST_INO(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \
EXT2_GOOD_OLD_FIRST_INO : \
(s)->s_first_ino)
/* Maximum size of a fast symlink including trailing '\0' */
#define MAX_FAST_SYMLINK_LENGTH \
( sizeof(((d_inode *)0)->i_block[0]) * EXT2_N_BLOCKS )
#define NUL(str,l,m) mfs_nul_f(__FILE__,__LINE__,(str), (l), (m))
/* Args to dev_bio/dev_io */
#define MFS_DEV_READ 10001
#define MFS_DEV_WRITE 10002
#define MFS_DEV_SCATTER 10003
#define MFS_DEV_GATHER 10004
/* FS states */
#define EXT2_VALID_FS 0x0001 /* Cleanly unmounted */
#define EXT2_ERROR_FS 0x0002 /* Errors detected */
#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */
#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
/* ext2 features, names shorted (cut EXT2_ prefix) */
#define COMPAT_DIR_PREALLOC 0x0001
#define COMPAT_IMAGIC_INODES 0x0002
#define COMPAT_HAS_JOURNAL 0x0004
#define COMPAT_EXT_ATTR 0x0008
#define COMPAT_RESIZE_INO 0x0010
#define COMPAT_DIR_INDEX 0x0020
#define COMPAT_ANY 0xffffffff
#define RO_COMPAT_SPARSE_SUPER 0x0001
#define RO_COMPAT_LARGE_FILE 0x0002
#define RO_COMPAT_BTREE_DIR 0x0004
#define RO_COMPAT_ANY 0xffffffff
#define INCOMPAT_COMPRESSION 0x0001
#define INCOMPAT_FILETYPE 0x0002
#define INCOMPAT_RECOVER 0x0004
#define INCOMPAT_JOURNAL_DEV 0x0008
#define INCOMPAT_META_BG 0x0010
#define INCOMPAT_ANY 0xffffffff
/* What do we support? */
#define SUPPORTED_INCOMPAT_FEATURES (INCOMPAT_FILETYPE)
#define SUPPORTED_RO_COMPAT_FEATURES (RO_COMPAT_SPARSE_SUPER | \
RO_COMPAT_LARGE_FILE)
/* Ext2 directory file types. Only the low 3 bits are used.
* The other bits are reserved for now.
*/
#define EXT2_FT_UNKNOWN 0
#define EXT2_FT_REG_FILE 1
#define EXT2_FT_DIR 2
#define EXT2_FT_CHRDEV 3
#define EXT2_FT_BLKDEV 4
#define EXT2_FT_FIFO 5
#define EXT2_FT_SOCK 6
#define EXT2_FT_SYMLINK 7
#define EXT2_FT_MAX 8
#define HAS_COMPAT_FEATURE(sp, mask) \
( (sp)->s_feature_compat & (mask) )
#define HAS_RO_COMPAT_FEATURE(sp, mask) \
( (sp)->s_feature_ro_compat & (mask) )
#define HAS_INCOMPAT_FEATURE(sp, mask) \
( (sp)->s_feature_incompat & (mask) )
/* hash-indexed directory */
#define EXT2_INDEX_FL 0x00001000
/* Top of directory hierarchies*/
#define EXT2_TOPDIR_FL 0x00020000
#define EXT2_PREALLOC_BLOCKS 8
#endif /* EXT2_CONST_H */

359
servers/ext2/device.c Normal file
View file

@ -0,0 +1,359 @@
#include "fs.h"
#include <minix/com.h>
#include <minix/endpoint.h>
#include <minix/safecopies.h>
#include <minix/u64.h>
#include <string.h>
#include "inode.h"
#include "super.h"
#include "const.h"
#include "drivers.h"
#include <minix/vfsif.h>
FORWARD _PROTOTYPE( int safe_io_conversion, (endpoint_t driver,
cp_grant_id_t *gid, int *op, cp_grant_id_t *gids, endpoint_t *io_ept,
void **buffer, int *vec_grants, size_t bytes));
FORWARD _PROTOTYPE( void safe_io_cleanup, (cp_grant_id_t, cp_grant_id_t *,
int));
FORWARD _PROTOTYPE( int gen_opcl, (endpoint_t driver_e, int op,
dev_t dev, endpoint_t proc_e, int flags) );
FORWARD _PROTOTYPE( int gen_io, (endpoint_t task_nr, message *mess_ptr) );
/*===========================================================================*
* fs_new_driver *
*===========================================================================*/
PUBLIC int fs_new_driver(void)
{
/* New driver endpoint for this device */
dev_t dev;
dev = (dev_t) fs_m_in.REQ_DEV;
driver_endpoints[major(dev)].driver_e = (endpoint_t) fs_m_in.REQ_DRIVER_E;
return(OK);
}
/*===========================================================================*
* safe_io_conversion *
*===========================================================================*/
PRIVATE int safe_io_conversion(driver, gid, op, gids, io_ept, buffer,
vec_grants, bytes)
endpoint_t driver;
cp_grant_id_t *gid;
int *op;
cp_grant_id_t *gids;
endpoint_t *io_ept;
void **buffer;
int *vec_grants;
size_t bytes;
{
unsigned int j;
int access;
iovec_t *v;
static iovec_t *new_iovec;
STATICINIT(new_iovec, NR_IOREQS);
/* Number of grants allocated in vector I/O. */
*vec_grants = 0;
/* Driver can handle it - change request to a safe one. */
*gid = GRANT_INVALID;
switch(*op) {
case MFS_DEV_READ:
case MFS_DEV_WRITE:
/* Change to safe op. */
*op = *op == MFS_DEV_READ ? DEV_READ_S : DEV_WRITE_S;
*gid = cpf_grant_direct(driver, (vir_bytes) *buffer, bytes,
*op == DEV_READ_S ? CPF_WRITE : CPF_READ);
if(*gid == GRANT_INVALID) {
panic("cpf_grant_magic of buffer failed");
}
break;
case MFS_DEV_GATHER:
case MFS_DEV_SCATTER:
/* Change to safe op. */
*op = *op == MFS_DEV_GATHER ? DEV_GATHER_S : DEV_SCATTER_S;
/* Grant access to my new i/o vector. */
*gid = cpf_grant_direct(driver, (vir_bytes) new_iovec,
bytes * sizeof(iovec_t), CPF_READ|CPF_WRITE);
if(*gid == GRANT_INVALID) {
panic("cpf_grant_direct of vector failed");
}
v = (iovec_t *) *buffer;
/* Grant access to i/o buffers. */
for(j = 0; j < bytes; j++) {
if(j >= NR_IOREQS)
panic("vec too big: %u", bytes);
access = (*op == DEV_GATHER_S) ? CPF_WRITE : CPF_READ;
new_iovec[j].iov_addr = gids[j] =
cpf_grant_direct(driver, (vir_bytes) v[j].iov_addr,
(size_t) v[j].iov_size, access);
if(!GRANT_VALID(gids[j])) {
panic("ext2: grant to iovec buf failed");
}
new_iovec[j].iov_size = v[j].iov_size;
(*vec_grants)++;
}
/* Set user's vector to the new one. */
*buffer = new_iovec;
break;
default:
panic("Illegal operation %d\n", *op);
break;
}
/* If we have converted to a safe operation, I/O
* endpoint becomes FS if it wasn't already.
*/
if(GRANT_VALID(*gid)) {
*io_ept = SELF_E;
return 1;
}
/* Not converted to a safe operation (because there is no
* copying involved in this operation).
*/
return 0;
}
/*===========================================================================*
* safe_io_cleanup *
*===========================================================================*/
PRIVATE void safe_io_cleanup(gid, gids, gids_size)
cp_grant_id_t gid;
cp_grant_id_t *gids;
int gids_size;
{
/* Free resources (specifically, grants) allocated by safe_io_conversion(). */
int j;
(void) cpf_revoke(gid);
for(j = 0; j < gids_size; j++)
(void) cpf_revoke(gids[j]);
return;
}
/*===========================================================================*
* block_dev_io *
*===========================================================================*/
PUBLIC int block_dev_io(
int op, /* MFS_DEV_READ, MFS_DEV_WRITE, etc. */
dev_t dev, /* major-minor device number */
endpoint_t proc_e, /* in whose address space is buf? */
void *buffer, /* virtual address of the buffer */
u64_t pos, /* byte position */
size_t bytes /* how many bytes to transfer */
)
{
/* Read or write from a device. The parameter 'dev' tells which one. */
int r, safe;
message m;
cp_grant_id_t gid = GRANT_INVALID;
int vec_grants;
int op_used;
void *buf_used;
static cp_grant_id_t *gids;
endpoint_t driver_e;
STATICINIT(gids, NR_IOREQS);
/* Determine driver endpoint for this device */
driver_e = driver_endpoints[major(dev)].driver_e;
/* See if driver is roughly valid. */
if (driver_e == NONE) {
printf("ext2(%d) block_dev_io: no driver for dev %x\n", SELF_E, dev);
return(EDEADEPT);
}
/* The io vector copying relies on this I/O being for FS itself. */
if(proc_e != SELF_E) {
printf("ext2(%d) doing block_dev_io for non-self %d\n", SELF_E, proc_e);
panic("doing block_dev_io for non-self: %d", proc_e);
}
/* By default, these are right. */
m.IO_ENDPT = proc_e;
m.ADDRESS = buffer;
buf_used = buffer;
/* Convert parameters to 'safe mode'. */
op_used = op;
safe = safe_io_conversion(driver_e, &gid, &op_used, gids, &m.IO_ENDPT,
&buf_used, &vec_grants, bytes);
/* Set up rest of the message. */
if (safe) m.IO_GRANT = (char *) gid;
m.m_type = op_used;
m.DEVICE = minor(dev);
m.POSITION = ex64lo(pos);
m.COUNT = bytes;
m.HIGHPOS = ex64hi(pos);
/* Call the task. */
r = sendrec(driver_e, &m);
if(r == OK && m.REP_STATUS == ERESTART) r = EDEADEPT;
/* As block I/O never SUSPENDs, safe cleanup must be done whether
* the I/O succeeded or not. */
if (safe) safe_io_cleanup(gid, gids, vec_grants);
/* RECOVERY:
* - send back dead driver number
* - VFS unmaps it, waits for new driver
* - VFS sends the new driver endp for the FS proc and the request again
*/
if (r != OK) {
if (r == EDEADSRCDST || r == EDEADEPT) {
printf("ext2(%d) dead driver %d\n", SELF_E, driver_e);
driver_endpoints[major(dev)].driver_e = NONE;
return(r);
} else if (r == ELOCKED) {
printf("ext2(%d) ELOCKED talking to %d\n", SELF_E, driver_e);
return(r);
} else
panic("call_task: can't send/receive: %d", r);
} else {
/* Did the process we did the sendrec() for get a result? */
if (m.REP_ENDPT != proc_e) {
printf("ext2(%d) strange device reply from %d, type = %d, proc "
"= %d (not %d) (2) ignored\n", SELF_E, m.m_source,
m.m_type, proc_e, m.REP_ENDPT);
r = EIO;
}
}
/* Task has completed. See if call completed. */
if (m.REP_STATUS == SUSPEND) {
panic("ext2 block_dev_io: driver returned SUSPEND");
}
if(buffer != buf_used && r == OK) {
memcpy(buffer, buf_used, bytes * sizeof(iovec_t));
}
return(m.REP_STATUS);
}
/*===========================================================================*
* dev_open *
*===========================================================================*/
PUBLIC int dev_open(
endpoint_t driver_e,
dev_t dev, /* device to open */
endpoint_t proc_e, /* process to open for */
int flags /* mode bits and flags */
)
{
int major, r;
/* Determine the major device number call the device class specific
* open/close routine. (This is the only routine that must check the
* device number for being in range. All others can trust this check.)
*/
major = major(dev);
if (major >= NR_DEVICES) {
printf("Major device number %d not in range\n", major(dev));
return(EIO);
}
r = gen_opcl(driver_e, DEV_OPEN, dev, proc_e, flags);
if (r == SUSPEND) panic("suspend on open from");
return(r);
}
/*===========================================================================*
* dev_close *
*===========================================================================*/
PUBLIC void dev_close(
endpoint_t driver_e,
dev_t dev /* device to close */
)
{
(void) gen_opcl(driver_e, DEV_CLOSE, dev, 0, 0);
}
/*===========================================================================*
* gen_opcl *
*===========================================================================*/
PRIVATE int gen_opcl(
endpoint_t driver_e,
int op, /* operation, DEV_OPEN or DEV_CLOSE */
dev_t dev, /* device to open or close */
endpoint_t proc_e, /* process to open/close for */
int flags /* mode bits and flags */
)
{
/* Called from the dmap struct in table.c on opens & closes of special files.*/
message dev_mess;
dev_mess.m_type = op;
dev_mess.DEVICE = minor(dev);
dev_mess.IO_ENDPT = proc_e;
dev_mess.COUNT = flags;
/* Call the task. */
(void) gen_io(driver_e, &dev_mess);
return(dev_mess.REP_STATUS);
}
/*===========================================================================*
* gen_io *
*===========================================================================*/
PRIVATE int gen_io(
endpoint_t task_nr, /* which task to call */
message *mess_ptr /* pointer to message for task */
)
{
/* All file system I/O ultimately comes down to I/O on major/minor device
* pairs. These lead to calls on the following routines via the dmap table.
*/
int r, proc_e;
proc_e = mess_ptr->IO_ENDPT;
r = sendrec(task_nr, mess_ptr);
if(r == OK && mess_ptr->REP_STATUS == ERESTART)
r = EDEADEPT;
if (r != OK) {
if (r == EDEADSRCDST || r == EDEADEPT) {
printf("fs: dead driver %d\n", task_nr);
panic("should handle crashed drivers");
return(r);
}
if (r == ELOCKED) {
printf("fs: ELOCKED talking to %d\n", task_nr);
return(r);
}
panic("call_task: can't send/receive: %d", r);
}
/* Did the process we did the sendrec() for get a result? */
if (mess_ptr->REP_ENDPT != proc_e) {
printf("fs: strange device reply from %d, type = %d, proc = %d (not "
"%d) (2) ignored\n", mess_ptr->m_source, mess_ptr->m_type,
proc_e,
mess_ptr->REP_ENDPT);
return(EIO);
}
return(OK);
}

11
servers/ext2/drivers.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef EXT2_DRIVERS_H
#define EXT2_DRIVERS_H
/* Driver endpoints for major devices. Only the block devices
* are mapped here, it's a subset of the mapping in the VFS */
EXTERN struct driver_endpoints {
endpoint_t driver_e;
} driver_endpoints[NR_DEVICES];
#endif /* EXT2_DRIVERS_H */

35
servers/ext2/fs.h Normal file
View file

@ -0,0 +1,35 @@
/* This is the master header for fs. It includes some other files
* and defines the principal constants.
*/
#ifndef EXT2_FS_H
#define EXT2_FS_H
#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */
#define _MINIX 1 /* tell headers to include MINIX stuff */
#define _SYSTEM 1 /* tell headers that this is the kernel */
#define VERBOSE 0 /* show messages during initialization? */
/* The following are so basic, all the *.c files get them automatically. */
#include <minix/config.h> /* MUST be first */
#include <ansi.h> /* MUST be second */
#include <sys/types.h>
#include <minix/const.h>
#include <minix/type.h>
#include <minix/dmap.h>
#include <limits.h>
#include <errno.h>
#include <minix/syslib.h>
#include <minix/sysutil.h>
#include "const.h"
#include "type.h"
#include "proto.h"
#include "glo.h"
#define ext2_debug printf
#endif /* EXT2_FS_H */

62
servers/ext2/glo.h Normal file
View file

@ -0,0 +1,62 @@
/* EXTERN should be extern except for the table file */
#ifndef EXT2_GLO_H
#define EXT2_GLO_H
#ifdef _TABLE
#undef EXTERN
#define EXTERN
#endif
#include <minix/vfsif.h>
/* The following variables are used for returning results to the caller. */
EXTERN int err_code; /* temporary storage for error number */
EXTERN int rdwt_err; /* status of last disk i/o request */
EXTERN int cch[NR_INODES];
extern char dot1[2]; /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */
extern char dot2[3]; /* meaning to search_dir: no access permission check. */
extern _PROTOTYPE (int (*fs_call_vec[]), (void) ); /* fs call table */
EXTERN message fs_m_in;
EXTERN message fs_m_out;
EXTERN vfs_ucred_t credentials;
EXTERN uid_t caller_uid;
EXTERN gid_t caller_gid;
EXTERN int req_nr;
EXTERN endpoint_t SELF_E;
EXTERN char user_path[PATH_MAX+1]; /* pathname to be processed */
EXTERN dev_t fs_dev; /* The device that is handled by this FS proc
*/
EXTERN char fs_dev_label[16]; /* Name of the device driver that is handled
* by this FS proc.
*/
EXTERN int unmountdone;
EXTERN int exitsignaled;
/* our block size. */
EXTERN unsigned int fs_block_size;
/* Buffer cache. */
EXTERN struct buf *buf;
EXTERN struct buf **buf_hash; /* the buffer hash table */
EXTERN unsigned int nr_bufs;
EXTERN int may_use_vmcache;
/* Little hack for syncing group descriptors. */
EXTERN int group_descriptors_dirty;
EXTERN struct opt opt; /* global options */
/* On ext2 metadata is stored in little endian format, so we shoud take
* care about byte swapping, when have BE CPU. */
EXTERN int le_CPU; /* little/big endian, if TRUE do not swap bytes */
#endif /* EXT2_GLO_H */

476
servers/ext2/ialloc.c Normal file
View file

@ -0,0 +1,476 @@
/* This files manages inodes allocation and deallocation.
*
* The entry points into this file are:
* alloc_inode: allocate a new, unused inode.
* free_inode: mark an inode as available for a new file.
*
* Created (alloc_inode/free_inode/wipe_inode are from MFS):
* June 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <string.h>
#include <stdlib.h>
#include <minix/com.h>
#include <minix/u64.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include "const.h"
FORWARD _PROTOTYPE( bit_t alloc_inode_bit, (struct super_block *sp,
struct inode *parent,
int is_dir));
FORWARD _PROTOTYPE( void free_inode_bit, (struct super_block *sp,
bit_t bit_returned,
int is_dir));
FORWARD _PROTOTYPE( void wipe_inode, (struct inode *rip));
/*===========================================================================*
* alloc_inode *
*===========================================================================*/
PUBLIC struct inode *alloc_inode(struct inode *parent, mode_t bits)
{
/* Allocate a free inode on parent's dev, and return a pointer to it. */
register struct inode *rip;
register struct super_block *sp;
int major, minor, inumb;
bit_t b;
sp = get_super(parent->i_dev); /* get pointer to super_block */
if (sp->s_rd_only) { /* can't allocate an inode on a read only device. */
err_code = EROFS;
return(NULL);
}
/* Acquire an inode from the bit map. */
b = alloc_inode_bit(sp, parent, (bits & I_TYPE) == I_DIRECTORY);
if (b == NO_BIT) {
err_code = ENFILE;
major = (int) (sp->s_dev >> MAJOR) & BYTE;
minor = (int) (sp->s_dev >> MINOR) & BYTE;
ext2_debug("Out of i-nodes on device %d/%d\n", major, minor);
return(NULL);
}
inumb = (int) b; /* be careful not to pass unshort as param */
/* Try to acquire a slot in the inode table. */
if ((rip = get_inode(NO_DEV, inumb)) == NULL) {
/* No inode table slots available. Free the inode just allocated. */
free_inode_bit(sp, b, (bits & I_TYPE) == I_DIRECTORY);
} else {
/* An inode slot is available. Put the inode just allocated into it. */
rip->i_mode = bits; /* set up RWX bits */
rip->i_links_count = NO_LINK; /* initial no links */
rip->i_uid = caller_uid; /* file's uid is owner's */
rip->i_gid = caller_gid; /* ditto group id */
rip->i_dev = parent->i_dev; /* mark which device it is on */
rip->i_sp = sp; /* pointer to super block */
/* Fields not cleared already are cleared in wipe_inode(). They have
* been put there because truncate() needs to clear the same fields if
* the file happens to be open while being truncated. It saves space
* not to repeat the code twice.
*/
wipe_inode(rip);
}
return(rip);
}
/*===========================================================================*
* free_inode *
*===========================================================================*/
PUBLIC void free_inode(
register struct inode *rip /* inode to free */
)
{
/* Return an inode to the pool of unallocated inodes. */
register struct super_block *sp;
dev_t dev = rip->i_dev;
bit_t b = rip->i_num;
u16_t mode = rip->i_mode;
/* Locate the appropriate super_block. */
sp = get_super(dev);
if (b <= NO_ENTRY || b > sp->s_inodes_count)
return;
free_inode_bit(sp, b, (mode & I_TYPE) == I_DIRECTORY);
rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */
}
FORWARD _PROTOTYPE( int find_group_dir, (struct super_block *sp,
struct inode *parent) );
FORWARD _PROTOTYPE( int find_group_hashalloc, (struct super_block *sp,
struct inode *parent) );
FORWARD _PROTOTYPE( int find_group_any, (struct super_block *sp,
struct inode *parent) );
FORWARD _PROTOTYPE( int find_group_orlov, (struct super_block *sp,
struct inode *parent) );
/*===========================================================================*
* alloc_inode_bit *
*===========================================================================*/
PRIVATE bit_t alloc_inode_bit(sp, parent, is_dir)
struct super_block *sp; /* the filesystem to allocate from */
struct inode *parent; /* parent of newly allocated inode */
int is_dir; /* inode will be a directory if it is TRUE */
{
int group;
ino_t inumber = NO_BIT;
bit_t bit;
struct buf *bp;
struct group_desc *gd;
if (sp->s_rd_only)
panic("can't alloc inode on read-only filesys.");
if (opt.mfsalloc) {
group = find_group_any(sp, parent);
} else {
if (is_dir) {
if (opt.use_orlov) {
group = find_group_orlov(sp, parent);
} else {
group = find_group_dir(sp, parent);
}
} else {
group = find_group_hashalloc(sp, parent);
}
}
/* Check if we have a group where to allocate an inode */
if (group == -1)
return(NO_BIT); /* no bit could be allocated */
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc block");
/* find_group_* should always return either a group with
* a free inode slot or -1, which we checked earlier.
*/
ASSERT(gd->free_inodes_count);
bp = get_block(sp->s_dev, gd->inode_bitmap, NORMAL);
bit = setbit(bp->b_bitmap, sp->s_inodes_per_group, 0);
ASSERT(bit != -1); /* group definitly contains free inode */
inumber = group * sp->s_inodes_per_group + bit + 1;
/* Extra checks before real allocation.
* Only major bug can cause problems. Since setbit changed
* bp->b_bitmap there is no way to recover from this bug.
* Should never happen.
*/
if (inumber > sp->s_inodes_count) {
panic("ext2: allocator returned inum greater, than\
total number of inodes.\n");
}
if (inumber < EXT2_FIRST_INO(sp)) {
panic("ext2: allocator tryed to use reserved inode.\n");
}
bp->b_dirt = DIRTY;
put_block(bp, MAP_BLOCK);
gd->free_inodes_count--;
sp->s_free_inodes_count--;
if (is_dir) {
gd->used_dirs_count++;
sp->s_dirs_counter++;
}
group_descriptors_dirty = DIRTY;
/* Almost the same as previous 'group' ASSERT */
ASSERT(inumber != NO_BIT);
return inumber;
}
/*===========================================================================*
* free_inode_bit *
*===========================================================================*/
PRIVATE void free_inode_bit(struct super_block *sp, bit_t bit_returned,
int is_dir)
{
/* Return an inode by turning off its bitmap bit. */
int group; /* group number of bit_returned */
int bit; /* bit_returned number within its group */
struct buf *bp;
struct group_desc *gd;
if (sp->s_rd_only)
panic("can't free bit on read-only filesys.");
/* At first search group, to which bit_returned belongs to
* and figure out in what word bit is stored.
*/
if (bit_returned > sp->s_inodes_count ||
bit_returned < EXT2_FIRST_INO(sp))
panic("trying to free inode %d beyond inodes scope.", bit_returned);
group = (bit_returned - 1) / sp->s_inodes_per_group;
bit = (bit_returned - 1) % sp->s_inodes_per_group; /* index in bitmap */
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc block");
bp = get_block(sp->s_dev, gd->inode_bitmap, NORMAL);
if (unsetbit(bp->b_bitmap, bit))
panic("Tried to free unused inode", bit_returned);
bp->b_dirt = DIRTY;
put_block(bp, MAP_BLOCK);
gd->free_inodes_count++;
sp->s_free_inodes_count++;
if (is_dir) {
gd->used_dirs_count--;
sp->s_dirs_counter--;
}
group_descriptors_dirty = DIRTY;
if (group < sp->s_igsearch)
sp->s_igsearch = group;
}
/* it's implemented very close to the linux' find_group_dir() */
PRIVATE int find_group_dir(struct super_block *sp, struct inode *parent)
{
int avefreei = sp->s_free_inodes_count / sp->s_groups_count;
struct group_desc *gd, *best_gd = NULL;
int group, best_group = -1;
for (group = 0; group < sp->s_groups_count; ++group) {
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count == 0)
continue;
if (gd->free_inodes_count < avefreei)
continue;
if (!best_gd ||
gd->free_blocks_count > best_gd->free_blocks_count) {
best_gd = gd;
best_group = group;
}
}
return best_group; /* group or -1 */
}
/* Analog of ffs_hashalloc() from *BSD.
* 1) Check parent's for free inodes and blocks.
* 2) Quadradically rehash on the group number.
* 3) Make a linear search for free inode.
*/
PRIVATE int find_group_hashalloc(struct super_block *sp, struct inode *parent)
{
int ngroups = sp->s_groups_count;
struct group_desc *gd;
int group, i;
int parent_group = (parent->i_num - 1) / sp->s_inodes_per_group;
/* Try to place new inode in its parent group */
gd = get_group_desc(parent_group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count && gd->free_blocks_count)
return parent_group;
/* We can't allocate inode in the parent's group.
* Now we will try to place it in another blockgroup.
* The main idea is still to keep files from the same
* directory together and use different blockgroups for
* files from another directory, which lives in the same
* blockgroup as our parent.
* Thus we will spread things on the disk.
*/
group = (parent_group + parent->i_num) % ngroups;
/* Make quadratic probing to find a group with free inodes and blocks. */
for (i = 1; i < ngroups; i <<= 1) {
group += i;
if (group >= ngroups)
group -= ngroups;
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count && gd->free_blocks_count)
return group;
}
/* Still no group for new inode, try linear search.
* Also check parent again (but for free inodes only).
*/
group = parent_group;
for (i = 0; i < ngroups; i++, group++) {
if (group >= ngroups)
group = 0;
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count)
return group;
}
return -1;
}
/* Find first group which has free inode slot.
* This is similar to what MFS does.
*/
PRIVATE int find_group_any(struct super_block *sp, struct inode *parent)
{
int ngroups = sp->s_groups_count;
struct group_desc *gd;
int group = sp->s_igsearch;
for (; group < ngroups; group++) {
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count) {
sp->s_igsearch = group;
return group;
}
}
return -1;
}
/* We try to spread first-level directories (i.e. directories in the root
* or in the directory marked as TOPDIR).
* If there are blockgroups with counts for blocks and inodes less than average
* we return a group with lowest directory count. Otherwise we either
* return a group with good free inodes and blocks counts or just a group
* with free inode.
*
* For other directories we try to find a 'good' group, we consider a group as
* a 'good' if it has enough blocks and inodes (greater than min_blocks and
* min_inodes).
*
*/
PRIVATE int find_group_orlov(struct super_block *sp, struct inode *parent)
{
int avefreei = sp->s_free_inodes_count / sp->s_groups_count;
int avefreeb = sp->s_free_blocks_count / sp->s_groups_count;
int group = -1;
int fallback_group = -1; /* Group with at least 1 free inode */
struct group_desc *gd;
int i;
if (parent->i_num == ROOT_INODE ||
parent->i_flags & EXT2_TOPDIR_FL) {
int best_group = -1;
int best_avefree_group = -1; /* Best value of avefreei/avefreeb */
int best_ndir = sp->s_inodes_per_group;
group = (unsigned int)random();
for (i = 0; i < sp->s_groups_count; i++, group++) {
if (group >= sp->s_groups_count)
group = 0;
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count == 0)
continue;
fallback_group = group;
if (gd->free_inodes_count < avefreei ||
gd->free_blocks_count < avefreeb)
continue;
best_avefree_group = group;
if (gd->used_dirs_count >= best_ndir)
continue;
best_ndir = gd->used_dirs_count;
best_group = group;
}
if (best_group >= 0)
return best_group;
if (best_avefree_group >= 0)
return best_avefree_group;
return fallback_group;
} else {
int parent_group = (parent->i_num - 1) / sp->s_inodes_per_group;
/* 2 is kind of random thing for now,
* but performance results are still good.
*/
int min_blocks = avefreeb / 2;
int min_inodes = avefreei / 2;
group = parent_group;
for (i = 0; i < sp->s_groups_count; i++, group++) {
if (group >= sp->s_groups_count)
group = 0;
gd = get_group_desc(group);
if (gd == NULL)
panic("can't get group_desc to alloc inode");
if (gd->free_inodes_count == 0)
continue;
fallback_group = group;
if (gd->free_inodes_count >= min_inodes &&
gd->free_blocks_count >= min_blocks)
return group;
}
return fallback_group;
}
return -1;
}
/*===========================================================================*
* wipe_inode *
*===========================================================================*/
PRIVATE void wipe_inode(
register struct inode *rip /* the inode to be erased */
)
{
/* Erase some fields in the inode. This function is called from alloc_inode()
* when a new inode is to be allocated, and from truncate(), when an existing
* inode is to be truncated.
*/
register int i;
rip->i_size = 0;
rip->i_update = ATIME | CTIME | MTIME; /* update all times later */
rip->i_blocks = 0;
rip->i_flags = 0;
rip->i_generation = 0;
rip->i_file_acl = 0;
rip->i_dir_acl = 0;
rip->i_faddr = 0;
for (i = 0; i < EXT2_N_BLOCKS; i++)
rip->i_block[i] = NO_BLOCK;
rip->i_block[0] = NO_BLOCK;
rip->i_dirt = DIRTY;
}

426
servers/ext2/inode.c Normal file
View file

@ -0,0 +1,426 @@
/* This file manages the inode table. There are procedures to allocate and
* deallocate inodes, acquire, erase, and release them, and read and write
* them from the disk.
*
* The entry points into this file are
* get_inode: search inode table for a given inode; if not there,
* read it
* put_inode: indicate that an inode is no longer needed in memory
* update_times: update atime, ctime, and mtime
* rw_inode: read a disk block and extract an inode, or corresp. write
* dup_inode: indicate that someone else is using an inode table entry
* find_inode: retrieve pointer to inode in inode cache
*
* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <string.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
FORWARD _PROTOTYPE( void icopy, (struct inode *rip, d_inode *dip,
int direction, int norm));
FORWARD _PROTOTYPE( void addhash_inode, (struct inode *node) );
FORWARD _PROTOTYPE( void unhash_inode, (struct inode *node) );
/*===========================================================================*
* fs_putnode *
*===========================================================================*/
PUBLIC int fs_putnode(void)
{
/* Find the inode specified by the request message and decrease its counter.*/
struct inode *rip;
int count;
rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR);
if (!rip) {
printf("%s:%d put_inode: inode #%d dev: %d not found\n", __FILE__,
__LINE__, (ino_t) fs_m_in.REQ_INODE_NR, fs_dev);
panic("fs_putnode failed");
}
count = fs_m_in.REQ_COUNT;
if (count <= 0) {
printf("%s:%d put_inode: bad value for count: %d\n", __FILE__,
__LINE__, count);
panic("fs_putnode failed");
} else if (count > rip->i_count) {
printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__,
__LINE__, count, rip->i_count);
panic("fs_putnode failed");
}
/* Decrease reference counter, but keep one reference;
* it will be consumed by put_inode().
*/
rip->i_count -= count - 1;
put_inode(rip);
return(OK);
}
/*===========================================================================*
* init_inode_cache *
*===========================================================================*/
PUBLIC void init_inode_cache()
{
struct inode *rip;
struct inodelist *rlp;
inode_cache_hit = 0;
inode_cache_miss = 0;
/* init free/unused list */
TAILQ_INIT(&unused_inodes);
/* init hash lists */
for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp)
LIST_INIT(rlp);
/* add free inodes to unused/free list */
for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) {
rip->i_num = NO_ENTRY;
TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
}
}
/*===========================================================================*
* addhash_inode *
*===========================================================================*/
PRIVATE void addhash_inode(struct inode *node)
{
int hashi = node->i_num & INODE_HASH_MASK;
/* insert into hash table */
LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash);
}
/*===========================================================================*
* unhash_inode *
*===========================================================================*/
PRIVATE void unhash_inode(struct inode *node)
{
/* remove from hash table */
LIST_REMOVE(node, i_hash);
}
/*===========================================================================*
* get_inode *
*===========================================================================*/
PUBLIC struct inode *get_inode(
dev_t dev, /* device on which inode resides */
ino_t numb /* inode number (ANSI: may not be unshort) */
)
{
/* Find the inode in the hash table. If it is not there, get a free inode
* load it from the disk if it's necessary and put on the hash list
*/
register struct inode *rip;
int hashi;
int i;
hashi = (int) numb & INODE_HASH_MASK;
/* Search inode in the hash table */
LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
if (rip->i_num == numb && rip->i_dev == dev) {
/* If unused, remove it from the unused/free list */
if (rip->i_count == 0) {
inode_cache_hit++;
TAILQ_REMOVE(&unused_inodes, rip, i_unused);
}
++rip->i_count;
return(rip);
}
}
inode_cache_miss++;
/* Inode is not on the hash, get a free one */
if (TAILQ_EMPTY(&unused_inodes)) {
err_code = ENFILE;
return(NULL);
}
rip = TAILQ_FIRST(&unused_inodes);
/* If not free unhash it */
if (rip->i_num != NO_ENTRY)
unhash_inode(rip);
/* Inode is not unused any more */
TAILQ_REMOVE(&unused_inodes, rip, i_unused);
/* Load the inode. */
rip->i_dev = dev;
rip->i_num = numb;
rip->i_count = 1;
if (dev != NO_DEV)
rw_inode(rip, READING); /* get inode from disk */
rip->i_update = 0; /* all the times are initially up-to-date */
rip->i_last_dpos = 0; /* no dentries searched for yet */
rip->i_bsearch = NO_BLOCK;
rip->i_last_pos_bl_alloc = 0;
rip->i_last_dentry_size = 0;
rip->i_mountpoint= FALSE;
rip->i_preallocation = opt.use_prealloc;
rip->i_prealloc_count = rip->i_prealloc_index = 0;
for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) {
if (rip->i_prealloc_blocks[i] != NO_BLOCK) {
/* Actually this should never happen */
free_block(rip->i_sp, rip->i_prealloc_blocks[i]);
rip->i_prealloc_blocks[i] = NO_BLOCK;
ext2_debug("Warning: Unexpected preallocated block.");
}
}
/* Add to hash */
addhash_inode(rip);
return(rip);
}
/*===========================================================================*
* find_inode *
*===========================================================================*/
PUBLIC struct inode *find_inode(
dev_t dev, /* device on which inode resides */
ino_t numb /* inode number (ANSI: may not be unshort) */
)
{
/* Find the inode specified by the inode and device number. */
struct inode *rip;
int hashi;
hashi = (int) numb & INODE_HASH_MASK;
/* Search inode in the hash table */
LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
if (rip->i_count > 0 && rip->i_num == numb && rip->i_dev == dev) {
return(rip);
}
}
return(NULL);
}
/*===========================================================================*
* put_inode *
*===========================================================================*/
PUBLIC void put_inode(
register struct inode *rip /* pointer to inode to be released */
)
{
/* The caller is no longer using this inode. If no one else is using it either
* write it back to the disk immediately. If it has no links, truncate it and
* return it to the pool of available inodes.
*/
if (rip == NULL)
return; /* checking here is easier than in caller */
if (rip->i_count < 1)
panic("put_inode: i_count already below 1", rip->i_count);
if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */
if (rip->i_links_count == NO_LINK) {
/* i_nlinks == NO_LINK means free the inode. */
/* return all the disk blocks */
/* Ignore errors by truncate_inode in case inode is a block
* special or character special file.
*/
(void) truncate_inode(rip, (off_t) 0);
/* free inode clears I_TYPE field, since it's used there */
rip->i_dirt = DIRTY;
free_inode(rip);
}
rip->i_mountpoint = FALSE;
if (rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
discard_preallocated_blocks(rip); /* Return blocks to the filesystem */
if (rip->i_links_count == NO_LINK) {
/* free, put at the front of the LRU list */
unhash_inode(rip);
rip->i_num = NO_ENTRY;
TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
} else {
/* unused, put at the back of the LRU (cache it) */
TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused);
}
}
}
/*===========================================================================*
* update_times *
*===========================================================================*/
PUBLIC void update_times(
register struct inode *rip /* pointer to inode to be read/written */
)
{
/* Various system calls are required by the standard to update atime, ctime,
* or mtime. Since updating a time requires sending a message to the clock
* task--an expensive business--the times are marked for update by setting
* bits in i_update. When a stat, fstat, or sync is done, or an inode is
* released, update_times() may be called to actually fill in the times.
*/
time_t cur_time;
struct super_block *sp;
sp = rip->i_sp; /* get pointer to super block. */
if (sp->s_rd_only)
return; /* no updates for read-only file systems */
cur_time = clock_time();
if (rip->i_update & ATIME)
rip->i_atime = cur_time;
if (rip->i_update & CTIME)
rip->i_ctime = cur_time;
if (rip->i_update & MTIME)
rip->i_mtime = cur_time;
rip->i_update = 0; /* they are all up-to-date now */
}
/*===========================================================================*
* rw_inode *
*===========================================================================*/
PUBLIC void rw_inode(
register struct inode *rip, /* pointer to inode to be read/written */
int rw_flag /* READING or WRITING */
)
{
/* An entry in the inode table is to be copied to or from the disk. */
register struct buf *bp;
register struct super_block *sp;
register struct group_desc *gd;
register d_inode *dip;
u32_t block_group_number;
block_t b, offset;
/* Get the block where the inode resides. */
sp = get_super(rip->i_dev); /* get pointer to super block */
rip->i_sp = sp; /* inode must contain super block pointer */
block_group_number = (rip->i_num - 1) / sp->s_inodes_per_group;
gd = get_group_desc(block_group_number);
if (gd == NULL)
panic("can't get group_desc to read/write inode");
offset = ((rip->i_num - 1) % sp->s_inodes_per_group) * EXT2_INODE_SIZE(sp);
/* offset requires shifting, since each block contains several inodes,
* e.g. inode 2 is stored in bklock 0.
*/
b = (block_t) gd->inode_table + (offset >> sp->s_blocksize_bits);
bp = get_block(rip->i_dev, b, NORMAL);
offset &= (sp->s_block_size - 1);
dip = (d_inode*) (bp->b_data + offset);
/* Do the read or write. */
if (rw_flag == WRITING) {
if (rip->i_update)
update_times(rip); /* times need updating */
if (sp->s_rd_only == FALSE)
bp->b_dirt = DIRTY;
}
icopy(rip, dip, rw_flag, TRUE);
put_block(bp, INODE_BLOCK);
rip->i_dirt = CLEAN;
}
/*===========================================================================*
* icopy *
*===========================================================================*/
PRIVATE void icopy(
register struct inode *rip, /* pointer to the in-core inode struct */
register d_inode *dip, /* pointer to the on-disk struct */
int direction, /* READING (from disk) or WRITING (to disk) */
int norm /* TRUE = do not swap bytes; FALSE = swap */
)
{
int i;
if (direction == READING) {
/* Copy inode to the in-core table, swapping bytes if need be. */
rip->i_mode = conv2(norm,dip->i_mode);
rip->i_uid = conv2(norm,dip->i_uid);
rip->i_size = conv4(norm,dip->i_size);
rip->i_atime = conv4(norm,dip->i_atime);
rip->i_ctime = conv4(norm,dip->i_ctime);
rip->i_mtime = conv4(norm,dip->i_mtime);
rip->i_dtime = conv4(norm,dip->i_dtime);
rip->i_gid = conv2(norm,dip->i_gid);
rip->i_links_count = conv2(norm,dip->i_links_count);
rip->i_blocks = conv4(norm,dip->i_blocks);
rip->i_flags = conv4(norm,dip->i_flags);
/* Minix doesn't touch osd1 and osd2 either, so just copy. */
memcpy(&rip->osd1, &dip->osd1, sizeof(rip->osd1));
for (i = 0; i < EXT2_N_BLOCKS; i++)
rip->i_block[i] = conv4(norm, dip->i_block[i]);
rip->i_generation = conv4(norm,dip->i_generation);
rip->i_file_acl = conv4(norm,dip->i_file_acl);
rip->i_dir_acl = conv4(norm,dip->i_dir_acl);
rip->i_faddr = conv4(norm,dip->i_faddr);
memcpy(&rip->osd2, &dip->osd2, sizeof(rip->osd2));
} else {
/* Copying inode to disk from the in-core table. */
dip->i_mode = conv2(norm,rip->i_mode);
dip->i_uid = conv2(norm,rip->i_uid);
dip->i_size = conv4(norm,rip->i_size);
dip->i_atime = conv4(norm,rip->i_atime);
dip->i_ctime = conv4(norm,rip->i_ctime);
dip->i_mtime = conv4(norm,rip->i_mtime);
dip->i_dtime = conv4(norm,rip->i_dtime);
dip->i_gid = conv2(norm,rip->i_gid);
dip->i_links_count = conv2(norm,rip->i_links_count);
dip->i_blocks = conv4(norm,rip->i_blocks);
dip->i_flags = conv4(norm,rip->i_flags);
/* Minix doesn't touch osd1 and osd2 either, so just copy. */
memcpy(&dip->osd1, &rip->osd1, sizeof(dip->osd1));
for (i = 0; i < EXT2_N_BLOCKS; i++)
dip->i_block[i] = conv4(norm, rip->i_block[i]);
dip->i_generation = conv4(norm,rip->i_generation);
dip->i_file_acl = conv4(norm,rip->i_file_acl);
dip->i_dir_acl = conv4(norm,rip->i_dir_acl);
dip->i_faddr = conv4(norm,rip->i_faddr);
memcpy(&dip->osd2, &rip->osd2, sizeof(dip->osd2));
}
}
/*===========================================================================*
* dup_inode *
*===========================================================================*/
PUBLIC void dup_inode(
struct inode *ip /* The inode to be duplicated. */
)
{
/* This routine is a simplified form of get_inode() for the case where
* the inode pointer is already known.
*/
ip->i_count++;
}

120
servers/ext2/inode.h Normal file
View file

@ -0,0 +1,120 @@
/* Inode table. This table holds inodes that are currently in use. In some
* cases they have been opened by an open() or creat() system call, in other
* cases the file system itself needs the inode for one reason or another,
* such as to search a directory for a path name.
* The first part of the struct holds fields that are present on the
* disk; the second part holds fields not present on the disk.
* The disk inode part is also declared in "type.h" as 'd_inode'
*
*/
#ifndef EXT2_INODE_H
#define EXT2_INODE_H
#include <sys/queue.h>
/* Disk part of inode structure was taken from
* linux/include/linux/ext2_fs.h.
*/
EXTERN struct inode {
u16_t i_mode; /* File mode */
u16_t i_uid; /* Low 16 bits of Owner Uid */
u32_t i_size; /* Size in bytes */
u32_t i_atime; /* Access time */
u32_t i_ctime; /* Creation time */
u32_t i_mtime; /* Modification time */
u32_t i_dtime; /* Deletion Time */
u16_t i_gid; /* Low 16 bits of Group Id */
u16_t i_links_count; /* Links count */
u32_t i_blocks; /* 512-byte blocks count */
u32_t i_flags; /* File flags */
union {
struct {
u32_t l_i_reserved1;
} linux1;
struct {
u32_t h_i_translator;
} hurd1;
struct {
u32_t m_i_reserved1;
} masix1;
} osd1; /* OS dependent 1 */
u32_t i_block[EXT2_N_BLOCKS]; /* Pointers to blocks */
u32_t i_generation; /* File version (for NFS) */
u32_t i_file_acl; /* File ACL */
u32_t i_dir_acl; /* Directory ACL */
u32_t i_faddr; /* Fragment address */
union {
struct {
u8_t l_i_frag; /* Fragment number */
u8_t l_i_fsize; /* Fragment size */
u16_t i_pad1;
u16_t l_i_uid_high; /* these 2 fields */
u16_t l_i_gid_high; /* were reserved2[0] */
u32_t l_i_reserved2;
} linux2;
struct {
u8_t h_i_frag; /* Fragment number */
u8_t h_i_fsize; /* Fragment size */
u16_t h_i_mode_high;
u16_t h_i_uid_high;
u16_t h_i_gid_high;
u32_t h_i_author;
} hurd2;
struct {
u8_t m_i_frag; /* Fragment number */
u8_t m_i_fsize; /* Fragment size */
u16_t m_pad1;
u32_t m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
/* The following items are not present on the disk. */
dev_t i_dev; /* which device is the inode on */
ino_t i_num; /* inode number on its (minor) device */
int i_count; /* # times inode used; 0 means slot is free */
struct super_block *i_sp; /* pointer to super block for inode's device */
char i_dirt; /* CLEAN or DIRTY */
block_t i_bsearch; /* where to start search for new blocks,
* also this is last allocated block.
*/
off_t i_last_pos_bl_alloc; /* last write position for which we allocated
* a new block (should be block i_bsearch).
* used to check for sequential operation.
*/
off_t i_last_dpos; /* where to start dentry search */
int i_last_dentry_size; /* size of last found dentry */
char i_mountpoint; /* true if mounted on */
char i_seek; /* set on LSEEK, cleared on READ/WRITE */
char i_update; /* the ATIME, CTIME, and MTIME bits are here */
block_t i_prealloc_blocks[EXT2_PREALLOC_BLOCKS]; /* preallocated blocks */
int i_prealloc_count; /* number of preallocated blocks */
int i_prealloc_index; /* index into i_prealloc_blocks */
int i_preallocation; /* use preallocation for this inode, normally
* it's reset only when non-sequential write
* happens.
*/
LIST_ENTRY(inode) i_hash; /* hash list */
TAILQ_ENTRY(inode) i_unused; /* free and unused list */
} inode[NR_INODES];
/* list of unused/free inodes */
EXTERN TAILQ_HEAD(unused_inodes_t, inode) unused_inodes;
/* inode hashtable */
EXTERN LIST_HEAD(inodelist, inode) hash_inodes[INODE_HASH_SIZE];
EXTERN unsigned int inode_cache_hit;
EXTERN unsigned int inode_cache_miss;
/* Field values. Note that CLEAN and DIRTY are defined in "const.h" */
#define NO_SEEK 0 /* i_seek = NO_SEEK if last op was not SEEK */
#define ISEEK 1 /* i_seek = ISEEK if last op was SEEK */
#endif /* EXT2_INODE_H */

713
servers/ext2/link.c Normal file
View file

@ -0,0 +1,713 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <sys/stat.h>
#include <string.h>
#include <minix/com.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
#define SAME 1000
FORWARD _PROTOTYPE( int freesp_inode, (struct inode *rip, off_t st,
off_t end) );
FORWARD _PROTOTYPE( int remove_dir, (struct inode *rldirp,
struct inode *rip, char dir_name[NAME_MAX + 1]) );
FORWARD _PROTOTYPE( int unlink_file, (struct inode *dirp,
struct inode *rip, char file_name[NAME_MAX + 1]));
FORWARD _PROTOTYPE( off_t nextblock, (off_t pos, int blocksize) );
FORWARD _PROTOTYPE( void zeroblock_half, (struct inode *i, off_t p, int l));
FORWARD _PROTOTYPE( void zeroblock_range, (struct inode *i, off_t p, off_t h));
/* Args to zeroblock_half() */
#define FIRST_HALF 0
#define LAST_HALF 1
/*===========================================================================*
* fs_link *
*===========================================================================*/
PUBLIC int fs_link()
{
/* Perform the link(name1, name2) system call. */
struct inode *ip, *rip;
register int r;
char string[NAME_MAX + 1];
struct inode *new_ip;
phys_bytes len;
/* Copy the link name's last component */
len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, 0,
(vir_bytes) string, (size_t) len, D);
if (r != OK) return r;
NUL(string, len, sizeof(string));
/* Temporarily open the file. */
if( (rip = get_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* Check to see if the file has maximum number of links already. */
r = OK;
if (rip->i_links_count >= USHRT_MAX)
r = EMLINK;
if(rip->i_links_count >= LINK_MAX)
r = EMLINK;
/* Only super_user may link to directories. */
if(r == OK)
if( (rip->i_mode & I_TYPE) == I_DIRECTORY && caller_uid != SU_UID)
r = EPERM;
/* If error with 'name', return the inode. */
if (r != OK) {
put_inode(rip);
return(r);
}
/* Temporarily open the last dir */
if( (ip = get_inode(fs_dev, fs_m_in.REQ_DIR_INO)) == NULL)
return(EINVAL);
/* If 'name2' exists in full (even if no space) set 'r' to error. */
if ((new_ip = advance(ip, string, IGN_PERM)) == NULL) {
r = err_code;
if(r == ENOENT)
r = OK;
} else {
put_inode(new_ip);
r = EEXIST;
}
/* Try to link. */
if(r == OK)
r = search_dir(ip, string, &rip->i_num, ENTER, IGN_PERM,
rip->i_mode & I_TYPE);
/* If success, register the linking. */
if(r == OK) {
rip->i_links_count++;
rip->i_update |= CTIME;
rip->i_dirt = DIRTY;
}
/* Done. Release both inodes. */
put_inode(rip);
put_inode(ip);
return(r);
}
/*===========================================================================*
* fs_unlink *
*===========================================================================*/
PUBLIC int fs_unlink()
{
/* Perform the unlink(name) or rmdir(name) system call. The code for these two
* is almost the same. They differ only in some condition testing. Unlink()
* may be used by the superuser to do dangerous things; rmdir() may not.
*/
register struct inode *rip;
struct inode *rldirp;
int r;
char string[NAME_MAX + 1];
phys_bytes len;
/* Copy the last component */
len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) string, (size_t) len, D);
if (r != OK) return r;
NUL(string, len, sizeof(string));
/* Temporarily open the dir. */
if( (rldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* The last directory exists. Does the file also exist? */
rip = advance(rldirp, string, IGN_PERM);
r = err_code;
/* If error, return inode. */
if(r != OK) {
/* Mount point? */
if (r == EENTERMOUNT || r == ELEAVEMOUNT) {
put_inode(rip);
r = EBUSY;
}
put_inode(rldirp);
return(r);
}
/* Now test if the call is allowed, separately for unlink() and rmdir(). */
if(fs_m_in.m_type == REQ_UNLINK) {
/* Only the su may unlink directories, but the su can unlink any
* dir.*/
if( (rip->i_mode & I_TYPE) == I_DIRECTORY) r = EPERM;
/* Actually try to unlink the file; fails if parent is mode 0 etc. */
if (r == OK) r = unlink_file(rldirp, rip, string);
} else {
r = remove_dir(rldirp, rip, string); /* call is RMDIR */
}
/* If unlink was possible, it has been done, otherwise it has not. */
put_inode(rip);
put_inode(rldirp);
return(r);
}
/*===========================================================================*
* fs_rdlink *
*===========================================================================*/
PUBLIC int fs_rdlink()
{
block_t b; /* block containing link text */
struct buf *bp; /* buffer containing link text */
char* link_text; /* either bp->b_data or rip->i_block */
register struct inode *rip; /* target inode */
register int r; /* return value */
size_t copylen;
copylen = min( (size_t) fs_m_in.REQ_MEM_SIZE, UMAX_FILE_POS);
/* Temporarily open the file. */
if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
if (!S_ISLNK(rip->i_mode))
r = EACCES;
if (rip->i_size > MAX_FAST_SYMLINK_LENGTH) {
/* normal symlink */
if ((b = read_map(rip, (off_t) 0)) == NO_BLOCK) {
r = EIO;
} else {
bp = get_block(rip->i_dev, b, NORMAL);
link_text = bp->b_data;
if (bp)
r = OK;
else
r = EIO;
}
} else {
/* fast symlink, stored in inode */
link_text = (char*) rip->i_block;
r = OK;
}
if (r == OK) {
/* Passed all checks */
/* We can safely cast to unsigned, because copylen is guaranteed to be
below max file size */
copylen = min( copylen, (unsigned) rip->i_size);
bp = get_block(rip->i_dev, b, NORMAL);
r = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) link_text,
(size_t) copylen, D);
put_block(bp, DIRECTORY_BLOCK);
if (r == OK)
fs_m_out.RES_NBYTES = copylen;
}
put_inode(rip);
return(r);
}
/*===========================================================================*
* remove_dir *
*===========================================================================*/
PRIVATE int remove_dir(rldirp, rip, dir_name)
struct inode *rldirp; /* parent directory */
struct inode *rip; /* directory to be removed */
char dir_name[NAME_MAX + 1]; /* name of directory to be removed */
{
/* A directory file has to be removed. Five conditions have to met:
* - The file must be a directory
* - The directory must be empty (except for . and ..)
* - The final component of the path must not be . or ..
* - The directory must not be the root of a mounted file system (VFS)
* - The directory must not be anybody's root/working directory (VFS)
*/
int r;
/* search_dir checks that rip is a directory too. */
if ((r = search_dir(rip, "", NULL, IS_EMPTY, IGN_PERM, 0)) != OK)
return r;
if (strcmp(dir_name, ".") == 0 || strcmp(dir_name, "..") == 0)return(EINVAL);
if (rip->i_num == ROOT_INODE) return(EBUSY); /* can't remove 'root' */
/* Actually try to unlink the file; fails if parent is mode 0 etc. */
if ((r = unlink_file(rldirp, rip, dir_name)) != OK) return r;
/* Unlink . and .. from the dir. The super user can link and unlink any dir,
* so don't make too many assumptions about them.
*/
(void) unlink_file(rip, NULL, dot1);
(void) unlink_file(rip, NULL, dot2);
return(OK);
}
/*===========================================================================*
* unlink_file *
*===========================================================================*/
PRIVATE int unlink_file(dirp, rip, file_name)
struct inode *dirp; /* parent directory of file */
struct inode *rip; /* inode of file, may be NULL too. */
char file_name[NAME_MAX + 1]; /* name of file to be removed */
{
/* Unlink 'file_name'; rip must be the inode of 'file_name' or NULL. */
ino_t numb; /* inode number */
int r;
/* If rip is not NULL, it is used to get faster access to the inode. */
if (rip == NULL) {
/* Search for file in directory and try to get its inode. */
err_code = search_dir(dirp, file_name, &numb, LOOK_UP, IGN_PERM, 0);
if (err_code == OK) rip = get_inode(dirp->i_dev, (int) numb);
if (err_code != OK || rip == NULL) return(err_code);
} else {
dup_inode(rip); /* inode will be returned with put_inode */
}
r = search_dir(dirp, file_name, NULL, DELETE, IGN_PERM, 0);
if (r == OK) {
rip->i_links_count--; /* entry deleted from parent's dir */
rip->i_update |= CTIME;
rip->i_dirt = DIRTY;
}
put_inode(rip);
return(r);
}
/*===========================================================================*
* fs_rename *
*===========================================================================*/
PUBLIC int fs_rename()
{
/* Perform the rename(name1, name2) system call. */
struct inode *old_dirp, *old_ip; /* ptrs to old dir, file inodes */
struct inode *new_dirp, *new_ip; /* ptrs to new dir, file inodes */
struct inode *new_superdirp, *next_new_superdirp;
int r = OK; /* error flag; initially no error */
int odir, ndir; /* TRUE iff {old|new} file is dir */
int same_pdir; /* TRUE iff parent dirs are the same */
char old_name[NAME_MAX + 1], new_name[NAME_MAX + 1];
ino_t numb;
phys_bytes len;
/* Copy the last component of the old name */
len = fs_m_in.REQ_REN_LEN_OLD; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_REN_GRANT_OLD,
(vir_bytes) 0, (vir_bytes) old_name, (size_t) len, D);
if (r != OK) return r;
NUL(old_name, len, sizeof(old_name));
/* Copy the last component of the new name */
len = fs_m_in.REQ_REN_LEN_NEW; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_REN_GRANT_NEW,
(vir_bytes) 0, (vir_bytes) new_name, (size_t) len, D);
if (r != OK) return r;
NUL(new_name, len, sizeof(new_name));
/* Get old dir inode */
if( (old_dirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_REN_OLD_DIR)) == NULL)
return(err_code);
old_ip = advance(old_dirp, old_name, IGN_PERM);
r = err_code;
if (r == EENTERMOUNT || r == ELEAVEMOUNT) {
put_inode(old_ip);
if (r == EENTERMOUNT) r = EXDEV; /* should this fail at all? */
else if (r == ELEAVEMOUNT) r = EINVAL; /* rename on dot-dot */
}
/* Get new dir inode */
if( (new_dirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_REN_NEW_DIR)) == NULL)
r = err_code;
new_ip = advance(new_dirp, new_name, IGN_PERM); /* not required to exist */
/* However, if the check failed because the file does exist, don't continue.
* Note that ELEAVEMOUNT is covered by the dot-dot check later. */
if(err_code == EENTERMOUNT) {
put_inode(new_ip);
r = EBUSY;
}
if(old_ip != NULL)
odir = ((old_ip->i_mode & I_TYPE) == I_DIRECTORY); /* TRUE iff dir */
else
odir = FALSE;
/* If it is ok, check for a variety of possible errors. */
if(r == OK) {
same_pdir = (old_dirp == new_dirp);
/* The old inode must not be a superdirectory of the new last dir. */
if (odir && !same_pdir) {
dup_inode(new_superdirp = new_dirp);
while (TRUE) { /* may hang in a file system loop */
if (new_superdirp == old_ip) {
put_inode(new_superdirp);
r = EINVAL;
break;
}
next_new_superdirp = advance(new_superdirp, dot2,
IGN_PERM);
put_inode(new_superdirp);
if(next_new_superdirp == new_superdirp) {
put_inode(new_superdirp);
break;
}
if(err_code == ELEAVEMOUNT) {
/* imitate that we are back at the root,
* cross device checked already on VFS */
put_inode(next_new_superdirp);
err_code = OK;
break;
}
new_superdirp = next_new_superdirp;
if(new_superdirp == NULL) {
/* Missing ".." entry. Assume the worst. */
r = EINVAL;
break;
}
}
}
/* The old or new name must not be . or .. */
if(strcmp(old_name, ".") == 0 || strcmp(old_name, "..") == 0 ||
strcmp(new_name, ".") == 0 || strcmp(new_name, "..") == 0) {
r = EINVAL;
}
/* Both parent directories must be on the same device.
if(old_dirp->i_dev != new_dirp->i_dev) r = EXDEV; */
/* Some tests apply only if the new path exists. */
if(new_ip == NULL) {
/* don't rename a file with a file system mounted on it.
if (old_ip->i_dev != old_dirp->i_dev) r = EXDEV;*/
if(odir && (new_dirp->i_links_count >= SHRT_MAX ||
new_dirp->i_links_count >= LINK_MAX) &&
!same_pdir && r == OK) {
r = EMLINK;
}
} else {
if(old_ip == new_ip) r = SAME; /* old=new */
ndir = ((new_ip->i_mode & I_TYPE) == I_DIRECTORY);/* dir ? */
if(odir == TRUE && ndir == FALSE) r = ENOTDIR;
if(odir == FALSE && ndir == TRUE) r = EISDIR;
}
}
/* If a process has another root directory than the system root, we might
* "accidently" be moving it's working directory to a place where it's
* root directory isn't a super directory of it anymore. This can make
* the function chroot useless. If chroot will be used often we should
* probably check for it here. */
/* The rename will probably work. Only two things can go wrong now:
* 1. being unable to remove the new file. (when new file already exists)
* 2. being unable to make the new directory entry. (new file doesn't exists)
* [directory has to grow by one block and cannot because the disk
* is completely full].
*/
if(r == OK) {
if(new_ip != NULL) {
/* There is already an entry for 'new'. Try to remove it. */
if(odir)
r = remove_dir(new_dirp, new_ip, new_name);
else
r = unlink_file(new_dirp, new_ip, new_name);
}
/* if r is OK, the rename will succeed, while there is now an
* unused entry in the new parent directory. */
}
if(r == OK) {
/* If the new name will be in the same parent directory as the old
* one, first remove the old name to free an entry for the new name,
* otherwise first try to create the new name entry to make sure
* the rename will succeed.
*/
numb = old_ip->i_num; /* inode number of old file */
if(same_pdir) {
r = search_dir(old_dirp,old_name, NULL, DELETE,IGN_PERM, 0);
/* shouldn't go wrong. */
if(r == OK)
(void) search_dir(old_dirp, new_name, &numb, ENTER, IGN_PERM,
old_ip->i_mode & I_TYPE);
} else {
r = search_dir(new_dirp, new_name, &numb, ENTER, IGN_PERM,
old_ip->i_mode & I_TYPE);
if(r == OK)
(void) search_dir(old_dirp, old_name, (ino_t *) 0, DELETE,
IGN_PERM, 0);
}
}
/* If r is OK, the ctime and mtime of old_dirp and new_dirp have been marked
* for update in search_dir. */
if(r == OK && odir && !same_pdir) {
/* Update the .. entry in the directory (still points to old_dirp).*/
numb = new_dirp->i_num;
(void) unlink_file(old_ip, NULL, dot2);
if(search_dir(old_ip, dot2, &numb, ENTER, IGN_PERM, I_DIRECTORY) == OK) {
/* New link created. */
new_dirp->i_links_count++;
new_dirp->i_dirt = DIRTY;
}
}
/* Release the inodes. */
put_inode(old_dirp);
put_inode(old_ip);
put_inode(new_dirp);
put_inode(new_ip);
return(r == SAME ? OK : r);
}
/*===========================================================================*
* fs_ftrunc *
*===========================================================================*/
PUBLIC int fs_ftrunc(void)
{
struct inode *rip;
off_t start, end;
int r;
if( (rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
start = fs_m_in.REQ_TRC_START_LO;
end = fs_m_in.REQ_TRC_END_LO;
if (end == 0)
r = truncate_inode(rip, start);
else
r = freesp_inode(rip, start, end);
return(r);
}
/*===========================================================================*
* truncate_inode *
*===========================================================================*/
PUBLIC int truncate_inode(rip, newsize)
register struct inode *rip; /* pointer to inode to be truncated */
off_t newsize; /* inode must become this size */
{
/* Set inode to a certain size, freeing any blocks no longer referenced
* and updating the size in the inode. If the inode is extended, the
* extra space is a hole that reads as zeroes.
*
* Nothing special has to happen to file pointers if inode is opened in
* O_APPEND mode, as this is different per fd and is checked when
* writing is done.
*/
int r;
mode_t file_type;
discard_preallocated_blocks(rip);
file_type = rip->i_mode & I_TYPE; /* check to see if file is special */
if (file_type == I_CHAR_SPECIAL || file_type == I_BLOCK_SPECIAL)
return(EINVAL);
if (newsize > rip->i_sp->s_max_size) /* don't let inode grow too big */
return(EFBIG);
/* Free the actual space if truncating. */
if (newsize < rip->i_size) {
if ((r = freesp_inode(rip, newsize, rip->i_size)) != OK)
return(r);
}
/* Clear the rest of the last block if expanding. */
if (newsize > rip->i_size) zeroblock_half(rip, rip->i_size, LAST_HALF);
/* Next correct the inode size. */
rip->i_size = newsize;
rip->i_update |= CTIME | MTIME;
rip->i_dirt = DIRTY;
return(OK);
}
/*===========================================================================*
* freesp_inode *
*===========================================================================*/
PRIVATE int freesp_inode(rip, start, end)
register struct inode *rip; /* pointer to inode to be partly freed */
off_t start, end; /* range of bytes to free (end uninclusive) */
{
/* Cut an arbitrary hole in an inode. The caller is responsible for checking
* the reasonableness of the inode type of rip. The reason is this is that
* this function can be called for different reasons, for which different
* sets of inode types are reasonable. Adjusting the final size of the inode
* is to be done by the caller too, if wished.
*
* Consumers of this function currently are truncate_inode() (used to
* free indirect and data blocks for any type of inode, but also to
* implement the ftruncate() and truncate() system calls) and the F_FREESP
* fcntl().
*/
off_t p, e;
int r;
unsigned short block_size = rip->i_sp->s_block_size;
int zero_last, zero_first;
discard_preallocated_blocks(rip);
if (rip->i_blocks == 0) {
/* Either hole or symlink. Freeing fast symlink using
* write_map() causes segfaults since it doesn't use any
* blocks, but uses i_block[] to store target.
*/
return(OK);
}
if(end > rip->i_size) /* freeing beyond end makes no sense */
end = rip->i_size;
if(end <= start) /* end is uninclusive, so start<end */
return(EINVAL);
/* If freeing doesn't cross a block boundary, then we may only zero
* a range of the block.
*/
zero_last = start % block_size;
zero_first = end % block_size && end < rip->i_size;
if (start/block_size == (end-1)/block_size && (zero_last || zero_first)) {
zeroblock_range(rip, start, end-start);
} else {
/* First zero unused part of partly used blocks. */
if (zero_last)
zeroblock_half(rip, start, LAST_HALF);
if (zero_first)
zeroblock_half(rip, end, FIRST_HALF);
/* Now completely free the completely unused blocks.
* write_map() will free unused indirect
* blocks too. Converting the range to block numbers avoids
* overflow on p when doing e.g. 'p += block_size'.
*/
e = end / block_size;
if (end == rip->i_size && (end % block_size))
e++;
for (p = nextblock(start, block_size)/block_size; p < e; p++) {
if ((r = write_map(rip, p*block_size, NO_BLOCK, WMAP_FREE)) != OK)
return(r);
}
}
rip->i_update |= CTIME | MTIME;
rip->i_dirt = DIRTY;
return(OK);
}
/*===========================================================================*
* nextblock *
*===========================================================================*/
PRIVATE off_t nextblock(pos, block_size)
off_t pos;
unsigned short block_size;
{
/* Return the first position in the next block after position 'pos'
* (unless this is the first position in the current block).
* This can be done in one expression, but that can overflow pos.
*/
off_t p;
p = (pos / block_size) * block_size;
if (pos % block_size) p += block_size; /* Round up. */
return(p);
}
/*===========================================================================*
* zeroblock_half *
*===========================================================================*/
PRIVATE void zeroblock_half(rip, pos, half)
struct inode *rip;
off_t pos;
int half;
{
/* Zero the upper or lower 'half' of a block that holds position 'pos'.
* half can be FIRST_HALF or LAST_HALF.
*
* FIRST_HALF: 0..pos-1 will be zeroed
* LAST_HALF: pos..blocksize-1 will be zeroed
*/
off_t offset, len;
/* Offset of zeroing boundary. */
offset = pos % rip->i_sp->s_block_size;
if(half == LAST_HALF) {
len = rip->i_sp->s_block_size - offset;
} else {
len = offset;
pos -= offset;
offset = 0;
}
zeroblock_range(rip, pos, len);
}
/*===========================================================================*
* zeroblock_range *
*===========================================================================*/
PRIVATE void zeroblock_range(rip, pos, len)
struct inode *rip;
off_t pos;
off_t len;
{
/* Zero a range in a block.
* This function is used to zero a segment of a block.
*/
block_t b;
struct buf *bp;
off_t offset;
if (!len) return; /* no zeroing to be done. */
if ( (b = read_map(rip, pos)) == NO_BLOCK) return;
if ( (bp = get_block(rip->i_dev, b, NORMAL)) == NULL)
panic("zeroblock_range: no block");
offset = pos % rip->i_sp->s_block_size;
if (offset + len > rip->i_sp->s_block_size)
panic("zeroblock_range: len too long", len);
memset(bp->b_data + offset, 0, len);
bp->b_dirt = DIRTY;
put_block(bp, FULL_DATA_BLOCK);
}

236
servers/ext2/main.c Normal file
View file

@ -0,0 +1,236 @@
#include "fs.h"
#include <assert.h>
#include <minix/callnr.h>
#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <minix/dmap.h>
#include <minix/endpoint.h>
#include <minix/vfsif.h>
#include "buf.h"
#include "inode.h"
#include "drivers.h"
#include "optset.h"
/* Declare some local functions. */
FORWARD _PROTOTYPE(void get_work, (message *m_in) );
FORWARD _PROTOTYPE(void cch_check, (void) );
FORWARD _PROTOTYPE( void reply, (endpoint_t who, message *m_out) );
/* SEF functions and variables. */
FORWARD _PROTOTYPE( void sef_local_startup, (void) );
FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
EXTERN int env_argc;
EXTERN char **env_argv;
PRIVATE struct optset optset_table[] = {
{ "sb", OPT_INT, &opt.block_with_super, 0 },
{ "orlov", OPT_BOOL, &opt.use_orlov, TRUE },
{ "oldalloc", OPT_BOOL, &opt.use_orlov, FALSE },
{ "mfsalloc", OPT_BOOL, &opt.mfsalloc, TRUE },
{ "reserved", OPT_BOOL, &opt.use_reserved_blocks, TRUE },
{ "prealloc", OPT_BOOL, &opt.use_prealloc, TRUE },
{ "noprealloc", OPT_BOOL, &opt.use_prealloc, FALSE },
{ NULL }
};
/*===========================================================================*
* main *
*===========================================================================*/
PUBLIC int main(int argc, char *argv[])
{
/* This is the main routine of this service. The main loop consists of
* three major activities: getting new work, processing the work, and
* sending the reply. The loop never terminates, unless a panic occurs.
*/
int error, ind;
unsigned short test_endian = 1;
/* SEF local startup. */
env_setargs(argc, argv);
sef_local_startup();
le_CPU = (*(unsigned char *) &test_endian == 0 ? 0 : 1);
/* Server isn't tested on big endian CPU */
ASSERT(le_CPU == 1);
while(!unmountdone || !exitsignaled) {
endpoint_t src;
/* Wait for request message. */
get_work(&fs_m_in);
src = fs_m_in.m_source;
error = OK;
caller_uid = INVAL_UID; /* To trap errors */
caller_gid = INVAL_GID;
req_nr = fs_m_in.m_type;
if (req_nr < VFS_BASE) {
fs_m_in.m_type += VFS_BASE;
req_nr = fs_m_in.m_type;
}
ind = req_nr - VFS_BASE;
if (ind < 0 || ind >= NREQS) {
printf("mfs: bad request %d\n", req_nr);
printf("ind = %d\n", ind);
error = EINVAL;
} else {
error = (*fs_call_vec[ind])();
/*cch_check();*/
}
fs_m_out.m_type = error;
reply(src, &fs_m_out);
if (error == OK)
read_ahead(); /* do block read ahead */
}
}
/*===========================================================================*
* sef_local_startup *
*===========================================================================*/
PRIVATE void sef_local_startup()
{
/* Register init callbacks. */
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_restart(sef_cb_init_fail);
/* No live update support for now. */
/* Register signal callbacks. */
sef_setcb_signal_handler(sef_cb_signal_handler);
/* Let SEF perform startup. */
sef_startup();
}
/*===========================================================================*
* sef_cb_init_fresh *
*===========================================================================*/
PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
{
/* Initialize the Minix file server. */
int i, r;
/* Defaults */
opt.use_orlov = TRUE;
opt.mfsalloc = FALSE;
opt.use_reserved_blocks = FALSE;
opt.block_with_super = 0;
opt.use_prealloc = FALSE;
/* If we have been given an options string, parse options from there. */
for (i = 1; i < env_argc - 1; i++)
if (!strcmp(env_argv[i], "-o"))
optset_parse(optset_table, env_argv[++i]);
may_use_vmcache = 1;
/* Init inode table */
for (i = 0; i < NR_INODES; ++i) {
inode[i].i_count = 0;
cch[i] = 0;
}
init_inode_cache();
/* Init driver mapping */
for (i = 0; i < NR_DEVICES; ++i)
driver_endpoints[i].driver_e = NONE;
SELF_E = getprocnr();
buf_pool(DEFAULT_NR_BUFS);
fs_block_size = _MIN_BLOCK_SIZE;
fs_m_in.m_type = FS_READY;
if ((r = send(VFS_PROC_NR, &fs_m_in)) != OK) {
panic("Error sending login to VFS: %d", r);
}
return(OK);
}
/*===========================================================================*
* sef_cb_signal_handler *
*===========================================================================*/
PRIVATE void sef_cb_signal_handler(int signo)
{
/* Only check for termination signal, ignore anything else. */
if (signo != SIGTERM) return;
exitsignaled = 1;
(void) fs_sync();
/* If unmounting has already been performed, exit immediately.
* We might not get another message.
*/
if (unmountdone) exit(0);
}
/*===========================================================================*
* get_work *
*===========================================================================*/
PRIVATE void get_work(m_in)
message *m_in; /* pointer to message */
{
int r, srcok = 0;
endpoint_t src;
do {
if ((r = sef_receive(ANY, m_in)) != OK) /* wait for message */
panic("sef_receive failed: %d", r);
src = m_in->m_source;
if(src == VFS_PROC_NR) {
if(unmountdone)
printf("ext2: unmounted: unexpected message from FS\n");
else
srcok = 1; /* Normal FS request. */
} else
printf("ext2: unexpected source %d\n", src);
} while(!srcok);
assert((src == VFS_PROC_NR && !unmountdone));
}
/*===========================================================================*
* reply *
*===========================================================================*/
PRIVATE void reply(
endpoint_t who,
message *m_out /* report result */
)
{
if (OK != send(who, m_out)) /* send the message */
printf("ext2(%d) was unable to send reply\n", SELF_E);
}
/*===========================================================================*
* cch_check *
*===========================================================================*/
PRIVATE void cch_check(void)
{
int i;
for (i = 0; i < NR_INODES; ++i) {
if (inode[i].i_count != cch[i] && req_nr != REQ_GETNODE &&
req_nr != REQ_PUTNODE && req_nr != REQ_READSUPER &&
req_nr != REQ_MOUNTPOINT && req_nr != REQ_UNMOUNT &&
req_nr != REQ_SYNC && req_nr != REQ_LOOKUP) {
printf("ext2(%d) inode(%ul) cc: %d req_nr: %d\n", SELF_E,
inode[i].i_num, inode[i].i_count - cch[i], req_nr);
}
cch[i] = inode[i].i_count;
}
}

65
servers/ext2/misc.c Normal file
View file

@ -0,0 +1,65 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <assert.h>
#include <minix/vfsif.h>
#include "inode.h"
#include "super.h"
/*===========================================================================*
* fs_sync *
*===========================================================================*/
PUBLIC int fs_sync()
{
/* Perform the sync() system call. Flush all the tables.
* The order in which the various tables are flushed is critical. The
* blocks must be flushed last, since rw_inode() leaves its results in
* the block cache.
*/
struct inode *rip;
struct buf *bp;
int r;
assert(nr_bufs > 0);
assert(buf);
if (superblock->s_rd_only)
return(OK); /* nothing to sync */
/* Write all the dirty inodes to the disk. */
for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
if(rip->i_count > 0 && rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
/* Write all the dirty blocks to the disk, one drive at a time. */
for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
if(bp->b_dev != NO_DEV && bp->b_dirt == DIRTY)
flushall(bp->b_dev);
if (superblock->s_dev != NO_DEV) {
superblock->s_wtime = clock_time();
write_super(superblock);
}
return(OK); /* sync() can't fail */
}
/*===========================================================================*
* fs_flush *
*===========================================================================*/
PUBLIC int fs_flush()
{
/* Flush the blocks of a device from the cache after writing any dirty blocks
* to disk.
*/
dev_t dev = (dev_t) fs_m_in.REQ_DEV;
if(dev == fs_dev) return(EBUSY);
flushall(dev);
invalidate(dev);
return(OK);
}

258
servers/ext2/mount.c Normal file
View file

@ -0,0 +1,258 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <fcntl.h>
#include <string.h>
#include <minix/com.h>
#include <sys/stat.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include "drivers.h"
#include <minix/ds.h>
#include <minix/vfsif.h>
/*===========================================================================*
* fs_readsuper *
*===========================================================================*/
PUBLIC int fs_readsuper()
{
/* This function reads the superblock of the partition, gets the root inode
* and sends back the details of them. Note, that the FS process does not
* know the index of the vmnt object which refers to it, whenever the pathname
* lookup leaves a partition an ELEAVEMOUNT error is transferred back
* so that the VFS knows that it has to find the vnode on which this FS
* process' partition is mounted on.
*/
struct inode *root_ip;
cp_grant_id_t label_gid;
size_t label_len;
int r = OK;
endpoint_t driver_e;
int readonly, isroot;
u32_t mask;
fs_dev = fs_m_in.REQ_DEV;
label_gid = fs_m_in.REQ_GRANT;
label_len = fs_m_in.REQ_PATH_LEN;
readonly = (fs_m_in.REQ_FLAGS & REQ_RDONLY) ? 1 : 0;
isroot = (fs_m_in.REQ_FLAGS & REQ_ISROOT) ? 1 : 0;
if (label_len > sizeof(fs_dev_label))
return(EINVAL);
r = sys_safecopyfrom(fs_m_in.m_source, label_gid, 0,
(vir_bytes)fs_dev_label, label_len, D);
if (r != OK) {
printf("%s:%d fs_readsuper: safecopyfrom failed: %d\n",
__FILE__, __LINE__, r);
return(EINVAL);
}
r= ds_retrieve_label_endpt(fs_dev_label, &driver_e);
if (r != OK)
{
printf("ext2:fs_readsuper: ds_retrieve_label_endpt failed for '%s': %d\n",
fs_dev_label, r);
return EINVAL;
}
/* Map the driver endpoint for this major */
driver_endpoints[(fs_dev >> MAJOR) & BYTE].driver_e = driver_e;
/* Open the device the file system lives on. */
if (dev_open(driver_e, fs_dev, driver_e,
readonly ? R_BIT : (R_BIT|W_BIT)) != OK) {
return(EINVAL);
}
/* Fill in the super block. */
STATICINIT(superblock, sizeof(struct super_block));
if (!superblock)
panic("Can't allocate memory for superblock.");
superblock->s_dev = fs_dev; /* read_super() needs to know which dev */
r = read_super(superblock);
/* Is it recognized as a Minix filesystem? */
if (r != OK) {
superblock->s_dev = NO_DEV;
dev_close(driver_e, fs_dev);
return(r);
}
if (superblock->s_rev_level != EXT2_GOOD_OLD_REV) {
struct super_block *sp = superblock; /* just shorter name */
mask = ~SUPPORTED_INCOMPAT_FEATURES;
if (HAS_INCOMPAT_FEATURE(sp, mask)) {
if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_COMPRESSION & mask))
printf("ext2: fs compression is not supported by server\n");
if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_FILETYPE & mask))
printf("ext2: fs in dir filetype is not supported by server\n");
if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_RECOVER & mask))
printf("ext2: fs recovery is not supported by server\n");
if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_JOURNAL_DEV & mask))
printf("ext2: fs journal dev is not supported by server\n");
if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_META_BG & mask))
printf("ext2: fs meta bg is not supported by server\n");
return(EINVAL);
}
mask = ~SUPPORTED_RO_COMPAT_FEATURES;
if (HAS_RO_COMPAT_FEATURE(sp, mask)) {
if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_SPARSE_SUPER & mask)) {
printf("ext2: sparse super is not supported by server, \
remount read-only\n");
}
if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_LARGE_FILE & mask)) {
printf("ext2: large files are not supported by server, \
remount read-only\n");
}
if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_BTREE_DIR & mask)) {
printf("ext2: dir's btree is not supported by server, \
remount read-only\n");
}
return(EINVAL);
}
}
if (superblock->s_state == EXT2_ERROR_FS) {
printf("ext2: filesystem wasn't cleanly unmounted previous time\n");
superblock->s_dev = NO_DEV;
dev_close(driver_e, fs_dev);
return(EINVAL);
}
set_blocksize(superblock->s_block_size);
/* Get the root inode of the mounted file system. */
if ( (root_ip = get_inode(fs_dev, ROOT_INODE)) == NULL) {
printf("ext2: couldn't get root inode\n");
superblock->s_dev = NO_DEV;
dev_close(driver_e, fs_dev);
return(EINVAL);
}
if (root_ip != NULL && root_ip->i_mode == 0) {
printf("%s:%d zero mode for root inode?\n", __FILE__, __LINE__);
put_inode(root_ip);
superblock->s_dev = NO_DEV;
dev_close(driver_e, fs_dev);
return(EINVAL);
}
if (root_ip != NULL && (root_ip->i_mode & I_TYPE) != I_DIRECTORY) {
printf("%s:%d root inode has wrong type, it's not a DIR\n",
__FILE__, __LINE__);
put_inode(root_ip);
superblock->s_dev = NO_DEV;
dev_close(driver_e, fs_dev);
return(EINVAL);
}
superblock->s_rd_only = readonly;
superblock->s_is_root = isroot;
if (!readonly) {
superblock->s_state = EXT2_ERROR_FS;
superblock->s_mnt_count++;
superblock->s_mtime = clock_time();
write_super(superblock); /* Commit info, we just set above */
}
/* Root inode properties */
fs_m_out.RES_INODE_NR = root_ip->i_num;
fs_m_out.RES_MODE = root_ip->i_mode;
fs_m_out.RES_FILE_SIZE_LO = root_ip->i_size;
fs_m_out.RES_UID = root_ip->i_uid;
fs_m_out.RES_GID = root_ip->i_gid;
return(r);
}
/*===========================================================================*
* fs_mountpoint *
*===========================================================================*/
PUBLIC int fs_mountpoint()
{
/* This function looks up the mount point, it checks the condition whether
* the partition can be mounted on the inode or not.
*/
register struct inode *rip;
int r = OK;
mode_t bits;
/* Temporarily open the file. */
if( (rip = get_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
if(rip->i_mountpoint) r = EBUSY;
/* It may not be special. */
bits = rip->i_mode & I_TYPE;
if (bits == I_BLOCK_SPECIAL || bits == I_CHAR_SPECIAL) r = ENOTDIR;
put_inode(rip);
if(r == OK) rip->i_mountpoint = TRUE;
return(r);
}
/*===========================================================================*
* fs_unmount *
*===========================================================================*/
PUBLIC int fs_unmount()
{
/* Unmount a file system by device number. */
int count;
struct inode *rip, *root_ip;
if(superblock->s_dev != fs_dev) return(EINVAL);
/* See if the mounted device is busy. Only 1 inode using it should be
* open --the root inode-- and that inode only 1 time. */
count = 0;
for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
if (rip->i_count > 0 && rip->i_dev == fs_dev) count += rip->i_count;
if ((root_ip = find_inode(fs_dev, ROOT_INODE)) == NULL) {
printf("ext2: couldn't find root inode. Unmount failed.\n");
panic("ext2: couldn't find root inode");
return(EINVAL);
}
/* Sync fs data before checking count. In some cases VFS can force unmounting
* and it will damage unsynced FS. We don't sync before checking root_ip since
* if it is missing then something strange happened with FS, so it's better
* to not use possibly corrupted data for syncing.
*/
if (!superblock->s_rd_only) {
/* force any cached blocks out of memory */
(void) fs_sync();
}
if (count > 1) return(EBUSY); /* can't umount a busy file system */
put_inode(root_ip);
if (!superblock->s_rd_only) {
superblock->s_wtime = clock_time();
superblock->s_state = EXT2_VALID_FS;
write_super(superblock); /* Commit info, we just set above */
}
/* Close the device the file system lives on. */
dev_close(driver_endpoints[(fs_dev >> MAJOR) & BYTE].driver_e, fs_dev);
/* Finish off the unmount. */
superblock->s_dev = NO_DEV;
unmountdone = TRUE;
return(OK);
}

355
servers/ext2/open.c Normal file
View file

@ -0,0 +1,355 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <sys/stat.h>
#include <string.h>
#include <minix/com.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
FORWARD _PROTOTYPE( struct inode *new_node, (struct inode *ldirp,
char *string, mode_t bits, block_t z0));
/*===========================================================================*
* fs_create *
*===========================================================================*/
PUBLIC int fs_create()
{
phys_bytes len;
int r;
struct inode *ldirp;
struct inode *rip;
mode_t omode;
char lastc[NAME_MAX + 1];
/* Read request message */
omode = (mode_t) fs_m_in.REQ_MODE;
caller_uid = (uid_t) fs_m_in.REQ_UID;
caller_gid = (gid_t) fs_m_in.REQ_GID;
/* Try to make the file. */
/* Copy the last component (i.e., file name) */
len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) lastc, (size_t) len, D);
if (err_code != OK) return err_code;
NUL(lastc, len, sizeof(lastc));
/* Get last directory inode (i.e., directory that will hold the new inode) */
if ((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(ENOENT);
/* Create a new inode by calling new_node(). */
rip = new_node(ldirp, lastc, omode, NO_BLOCK);
r = err_code;
/* If an error occurred, release inode. */
if (r != OK) {
put_inode(ldirp);
put_inode(rip);
return(r);
}
/* Reply message */
fs_m_out.RES_INODE_NR = rip->i_num;
fs_m_out.RES_MODE = rip->i_mode;
fs_m_out.RES_FILE_SIZE_LO = rip->i_size;
/* This values are needed for the execution */
fs_m_out.RES_UID = rip->i_uid;
fs_m_out.RES_GID = rip->i_gid;
/* Drop parent dir */
put_inode(ldirp);
return(OK);
}
/*===========================================================================*
* fs_mknod *
*===========================================================================*/
PUBLIC int fs_mknod()
{
struct inode *ip, *ldirp;
char lastc[NAME_MAX + 1];
phys_bytes len;
/* Copy the last component and set up caller's user and group id */
len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) lastc, (size_t) len, D);
if (err_code != OK) return err_code;
NUL(lastc, len, sizeof(lastc));
caller_uid = (uid_t) fs_m_in.REQ_UID;
caller_gid = (gid_t) fs_m_in.REQ_GID;
/* Get last directory inode */
if((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(ENOENT);
/* Try to create the new node */
ip = new_node(ldirp, lastc, (mode_t) fs_m_in.REQ_MODE,
(block_t) fs_m_in.REQ_DEV);
put_inode(ip);
put_inode(ldirp);
return(err_code);
}
/*===========================================================================*
* fs_mkdir *
*===========================================================================*/
PUBLIC int fs_mkdir()
{
int r1, r2; /* status codes */
ino_t dot, dotdot; /* inode numbers for . and .. */
struct inode *rip, *ldirp;
char lastc[NAME_MAX + 1]; /* last component */
phys_bytes len;
/* Copy the last component and set up caller's user and group id */
len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
return(ENAMETOOLONG);
err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) lastc, (phys_bytes) len, D);
if(err_code != OK) return(err_code);
NUL(lastc, len, sizeof(lastc));
caller_uid = (uid_t) fs_m_in.REQ_UID;
caller_gid = (gid_t) fs_m_in.REQ_GID;
/* Get last directory inode */
if((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(ENOENT);
/* Next make the inode. If that fails, return error code. */
rip = new_node(ldirp, lastc, (ino_t) fs_m_in.REQ_MODE, (block_t) 0);
if(rip == NULL || err_code == EEXIST) {
put_inode(rip); /* can't make dir: it already exists */
put_inode(ldirp);
return(err_code);
}
/* Get the inode numbers for . and .. to enter in the directory. */
dotdot = ldirp->i_num; /* parent's inode number */
dot = rip->i_num; /* inode number of the new dir itself */
/* Now make dir entries for . and .. unless the disk is completely full. */
/* Use dot1 and dot2, so the mode of the directory isn't important. */
rip->i_mode = (mode_t) fs_m_in.REQ_MODE; /* set mode */
/* enter . in the new dir*/
r1 = search_dir(rip, dot1, &dot, ENTER, IGN_PERM, I_DIRECTORY);
/* enter .. in the new dir */
r2 = search_dir(rip, dot2, &dotdot, ENTER, IGN_PERM, I_DIRECTORY);
/* If both . and .. were successfully entered, increment the link counts. */
if (r1 == OK && r2 == OK) {
/* Normal case. It was possible to enter . and .. in the new dir. */
rip->i_links_count++; /* this accounts for . */
ldirp->i_links_count++; /* this accounts for .. */
ldirp->i_dirt = DIRTY; /* mark parent's inode as dirty */
} else {
/* It was not possible to enter . or .. probably disk was full -
* links counts haven't been touched. */
if (search_dir(ldirp, lastc, NULL, DELETE, IGN_PERM, 0) != OK)
panic("Dir disappeared ", rip->i_num);
rip->i_links_count--; /* undo the increment done in new_node() */
}
rip->i_dirt = DIRTY; /* either way, i_links_count has changed */
put_inode(ldirp); /* return the inode of the parent dir */
put_inode(rip); /* return the inode of the newly made dir */
return(err_code); /* new_node() always sets 'err_code' */
}
/*===========================================================================*
* fs_slink *
*===========================================================================*/
PUBLIC int fs_slink()
{
phys_bytes len;
struct inode *sip; /* inode containing symbolic link */
struct inode *ldirp; /* directory containing link */
register int r; /* error code */
char string[NAME_MAX]; /* last component of the new dir's path name */
char* link_target_buf; /* either sip->i_block or bp->b_data */
struct buf *bp = NULL; /* disk buffer for link */
caller_uid = (uid_t) fs_m_in.REQ_UID;
caller_gid = (gid_t) fs_m_in.REQ_GID;
/* Copy the link name's last component */
len = fs_m_in.REQ_PATH_LEN;
if (len > NAME_MAX || len > EXT2_NAME_MAX)
return(ENAMETOOLONG);
r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) string, (size_t) len, D);
if (r != OK) return(r);
NUL(string, len, sizeof(string));
/* Temporarily open the dir. */
if( (ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* Create the inode for the symlink. */
sip = new_node(ldirp, string, (mode_t) (I_SYMBOLIC_LINK | RWX_MODES),
(block_t) 0);
/* If we can then create fast symlink (store it in inode),
* Otherwise allocate a disk block for the contents of the symlink and
* copy contents of symlink (the name pointed to) into first disk block. */
if( (r = err_code) == OK) {
if ( (fs_m_in.REQ_MEM_SIZE + 1) > sip->i_sp->s_block_size) {
r = ENAMETOOLONG;
} else if ((fs_m_in.REQ_MEM_SIZE + 1) <= MAX_FAST_SYMLINK_LENGTH) {
r = sys_safecopyfrom(VFS_PROC_NR,
(cp_grant_id_t) fs_m_in.REQ_GRANT3,
(vir_bytes) 0, (vir_bytes) sip->i_block,
(vir_bytes) fs_m_in.REQ_MEM_SIZE, D);
sip->i_dirt = DIRTY;
link_target_buf = (char*) sip->i_block;
} else {
r = (bp = new_block(sip, (off_t) 0)) == NULL ? err_code :
sys_safecopyfrom(VFS_PROC_NR,
(cp_grant_id_t) fs_m_in.REQ_GRANT3,
(vir_bytes) 0, (vir_bytes) bp->b_data,
(vir_bytes) fs_m_in.REQ_MEM_SIZE, D);
bp->b_dirt = DIRTY;
link_target_buf = bp->b_data;
}
if (r == OK) {
link_target_buf[fs_m_in.REQ_MEM_SIZE] = '\0';
sip->i_size = (off_t) strlen(link_target_buf);
if (sip->i_size != fs_m_in.REQ_MEM_SIZE) {
/* This can happen if the user provides a buffer
* with a \0 in it. This can cause a lot of trouble
* when the symlink is used later. We could just use
* the strlen() value, but we want to let the user
* know he did something wrong. ENAMETOOLONG doesn't
* exactly describe the error, but there is no
* ENAMETOOWRONG.
*/
r = ENAMETOOLONG;
}
}
put_block(bp, DIRECTORY_BLOCK); /* put_block() accepts NULL. */
if(r != OK) {
sip->i_links_count = NO_LINK;
if (search_dir(ldirp, string, NULL, DELETE, IGN_PERM, 0) != OK)
panic("Symbolic link vanished");
}
}
/* put_inode() accepts NULL as a noop, so the below are safe. */
put_inode(sip);
put_inode(ldirp);
return(r);
}
/*===========================================================================*
* new_node *
*===========================================================================*/
PRIVATE struct inode *new_node(struct inode *ldirp,
char *string, mode_t bits, block_t b0)
{
/* New_node() is called by fs_open(), fs_mknod(), and fs_mkdir().
* In all cases it allocates a new inode, makes a directory entry for it in
* the ldirp directory with string name, and initializes it.
* It returns a pointer to the inode if it can do this;
* otherwise it returns NULL. It always sets 'err_code'
* to an appropriate value (OK or an error code).
*/
register struct inode *rip;
register int r;
/* Get final component of the path. */
rip = advance(ldirp, string, IGN_PERM);
if (S_ISDIR(bits) && (ldirp->i_links_count >= USHRT_MAX ||
ldirp->i_links_count >= LINK_MAX)) {
/* New entry is a directory, alas we can't give it a ".." */
put_inode(rip);
err_code = EMLINK;
return(NULL);
}
if ( rip == NULL && err_code == ENOENT) {
/* Last path component does not exist. Make new directory entry. */
if ( (rip = alloc_inode(ldirp, bits)) == NULL) {
/* Can't creat new inode: out of inodes. */
return(NULL);
}
/* Force inode to the disk before making directory entry to make
* the system more robust in the face of a crash: an inode with
* no directory entry is much better than the opposite.
*/
rip->i_links_count++;
rip->i_block[0] = b0; /* major/minor device numbers */
rw_inode(rip, WRITING); /* force inode to disk now */
/* New inode acquired. Try to make directory entry. */
if ((r=search_dir(ldirp, string, &rip->i_num, ENTER, IGN_PERM,
rip->i_mode & I_TYPE)) != OK) {
rip->i_links_count--; /* pity, have to free disk inode */
rip->i_dirt = DIRTY; /* dirty inodes are written out */
put_inode(rip); /* this call frees the inode */
err_code = r;
return(NULL);
}
} else if (err_code == EENTERMOUNT || err_code == ELEAVEMOUNT) {
r = EEXIST;
} else {
/* Either last component exists, or there is some problem. */
if (rip != NULL)
r = EEXIST;
else
r = err_code;
}
/* The caller has to return the directory inode (*ldirp). */
err_code = r;
return(rip);
}
/*===========================================================================*
* fs_inhibread *
*===========================================================================*/
PUBLIC int fs_inhibread()
{
struct inode *rip;
if((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* inhibit read ahead */
rip->i_seek = ISEEK;
return(OK);
}

128
servers/ext2/optset.c Normal file
View file

@ -0,0 +1,128 @@
/* This file provides functionality to parse strings of comma-separated
* options, each being either a single key name or a key=value pair, where the
* value may be enclosed in quotes. A table of optset entries is provided to
* determine which options are recognized, how to parse their values, and where
* to store those. Unrecognized options are silently ignored; improperly
* formatted options are silently set to reasonably acceptable values.
*
* The entry points into this file are:
* optset_parse parse the given options string using the given table
*
* Created:
* May 2009 (D.C. van Moolenbroek)
*/
#define _MINIX 1
#include <stdlib.h>
#include <string.h>
#include <minix/config.h>
#include <minix/const.h>
#include "optset.h"
FORWARD _PROTOTYPE( void optset_parse_entry, (struct optset *entry,
char *ptr, int len) );
/*===========================================================================*
* optset_parse_entry *
*===========================================================================*/
PRIVATE void optset_parse_entry(entry, ptr, len)
struct optset *entry;
char *ptr;
int len;
{
/* Parse and store the value of a single option.
*/
char *dst;
int val;
switch (entry->os_type) {
case OPT_BOOL:
*((int *) entry->os_ptr) = entry->os_val;
break;
case OPT_STRING:
if (len >= entry->os_val)
len = entry->os_val - 1;
dst = (char *) entry->os_ptr;
if (len > 0)
memcpy(dst, ptr, len);
dst[len] = 0;
break;
case OPT_INT:
if (len > 0)
val = strtol(ptr, NULL, entry->os_val);
else
val = 0;
*((int *) entry->os_ptr) = val;
break;
}
}
/*===========================================================================*
* optset_parse *
*===========================================================================*/
PUBLIC void optset_parse(table, string)
struct optset *table;
char *string;
{
/* Parse a string of options, using the provided table of optset entries.
*/
char *p, *kptr, *vptr;
int i, klen, vlen;
for (p = string; *p; ) {
/* Get the key name for the field. */
for (kptr = p, klen = 0; *p && *p != '=' && *p != ','; p++, klen++);
if (*p == '=') {
/* The field has an associated value. */
vptr = ++p;
/* If the first character after the '=' is a quote character,
* find a matching quote character followed by either a comma
* or the terminating null character, and use the string in
* between. Otherwise, use the string up to the next comma or
* the terminating null character.
*/
if (*p == '\'' || *p == '"') {
p++;
for (vlen = 0; *p && (*p != *vptr ||
(p[1] && p[1] != ',')); p++, vlen++);
if (*p) p++;
vptr++;
}
else
for (vlen = 0; *p && *p != ','; p++, vlen++);
}
else {
vptr = NULL;
vlen = 0;
}
if (*p == ',') p++;
/* Find a matching entry for this key in the given table. If found,
* call optset_parse_entry() on it. Silently ignore the option
* otherwise.
*/
for (i = 0; table[i].os_name != NULL; i++) {
if (strlen(table[i].os_name) == klen &&
!strncasecmp(table[i].os_name, kptr, klen)) {
optset_parse_entry(&table[i], vptr, vlen);
break;
}
}
}
}

30
servers/ext2/optset.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef _OPTSET_H
#define _OPTSET_H
enum {
OPT_BOOL,
OPT_STRING,
OPT_INT
};
/* An entry for the parser of an options set. The 'os_name' field must point
* to a string, which is treated case-insensitively; the last entry of a table
* must have NULL name. The 'os_type' field must be set to one of the OPT_
* values defined above. The 'os_ptr' field must point to the field that is to
* receive the value of a recognized option. For OPT_STRING, it must point to a
* string of a size set in 'os_val'; the resulting string may be truncated, but
* will always be null-terminated. For OPT_BOOL, it must point to an int which
* will be set to the value in 'os_val' if the option is present. For OPT_INT,
* it must point to an int which will be set to the provided option value;
* 'os_val' is then a base passed to strtol().
*/
struct optset {
char *os_name;
int os_type;
void *os_ptr;
int os_val;
};
_PROTOTYPE( void optset_parse, (struct optset *table, char *string) );
#endif /* _OPTSET_H */

729
servers/ext2/path.c Normal file
View file

@ -0,0 +1,729 @@
/* This file contains the procedures that look up path names in the directory
* system and determine the inode number that goes with a given path name.
*
* The entry points into this file are
* eat_path: the 'main' routine of the path-to-inode conversion mechanism
* last_dir: find the final directory on a given path
* advance: parse one component of a path name
* search_dir: search a directory for a string and return its inode number
*
* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <assert.h>
#include <string.h>
#include <minix/endpoint.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
PUBLIC char dot1[2] = "."; /* used for search_dir to bypass the access */
PUBLIC char dot2[3] = ".."; /* permissions for . and .. */
FORWARD _PROTOTYPE( char *get_name, (char *name, char string[NAME_MAX+1]) );
FORWARD _PROTOTYPE( int ltraverse, (struct inode *rip, char *suffix) );
FORWARD _PROTOTYPE( int parse_path, (ino_t dir_ino, ino_t root_ino,
int flags, struct inode **res_inop,
size_t *offsetp, int *symlinkp) );
/*===========================================================================*
* fs_lookup *
*===========================================================================*/
PUBLIC int fs_lookup()
{
cp_grant_id_t grant, grant2;
int r, r1, flags, symlinks;
unsigned int len;
size_t offset = 0, path_size, cred_size;
ino_t dir_ino, root_ino;
struct inode *rip;
grant = (cp_grant_id_t) fs_m_in.REQ_GRANT;
path_size = (size_t) fs_m_in.REQ_PATH_SIZE; /* Size of the buffer */
len = (int) fs_m_in.REQ_PATH_LEN; /* including terminating nul */
dir_ino = (ino_t) fs_m_in.REQ_DIR_INO;
root_ino = (ino_t) fs_m_in.REQ_ROOT_INO;
flags = (int) fs_m_in.REQ_FLAGS;
/* Check length. */
if(len > sizeof(user_path)) return(E2BIG); /* too big for buffer */
if(len == 0) return(EINVAL); /* too small */
/* Copy the pathname and set up caller's user and group id */
r = sys_safecopyfrom(VFS_PROC_NR, grant, /*offset*/ 0,
(vir_bytes) user_path, (size_t) len, D);
if(r != OK) return(r);
/* Verify this is a null-terminated path. */
if(user_path[len - 1] != '\0') return(EINVAL);
if(flags & PATH_GET_UCRED) { /* Do we have to copy uid/gid credentials? */
grant2 = (cp_grant_id_t) fs_m_in.REQ_GRANT2;
cred_size = (size_t) fs_m_in.REQ_UCRED_SIZE;
if (cred_size > sizeof(credentials)) return(EINVAL); /* Too big. */
r = sys_safecopyfrom(VFS_PROC_NR, grant2, (vir_bytes) 0,
(vir_bytes) &credentials, cred_size, D);
if (r != OK) return(r);
caller_uid = (uid_t) credentials.vu_uid;
caller_gid = (gid_t) credentials.vu_gid;
} else {
memset(&credentials, 0, sizeof(credentials));
caller_uid = fs_m_in.REQ_UID;
caller_gid = fs_m_in.REQ_GID;
}
/* Lookup inode */
rip = NULL;
r = parse_path(dir_ino, root_ino, flags, &rip, &offset, &symlinks);
if(symlinks != 0 && (r == ELEAVEMOUNT || r == EENTERMOUNT || r == ESYMLINK)){
len = strlen(user_path)+1;
if(len > path_size) return(ENAMETOOLONG);
r1 = sys_safecopyto(VFS_PROC_NR, grant, (vir_bytes) 0,
(vir_bytes) user_path, (size_t) len, D);
if (r1 != OK) return(r1);
}
if(r == ELEAVEMOUNT || r == ESYMLINK) {
/* Report offset and the error */
fs_m_out.RES_OFFSET = offset;
fs_m_out.RES_SYMLOOP = symlinks;
return(r);
}
if (r != OK && r != EENTERMOUNT) return(r);
fs_m_out.RES_INODE_NR = rip->i_num;
fs_m_out.RES_MODE = rip->i_mode;
fs_m_out.RES_FILE_SIZE_LO = rip->i_size;
fs_m_out.RES_SYMLOOP = symlinks;
fs_m_out.RES_UID = rip->i_uid;
fs_m_out.RES_GID = rip->i_gid;
/* This is only valid for block and character specials. But it doesn't
* cause any harm to set RES_DEV always. */
fs_m_out.RES_DEV = (dev_t) rip->i_block[0];
if(r == EENTERMOUNT) {
fs_m_out.RES_OFFSET = offset;
put_inode(rip); /* Only return a reference to the final object */
}
return(r);
}
/*===========================================================================*
* parse_path *
*===========================================================================*/
PRIVATE int parse_path(dir_ino, root_ino, flags, res_inop, offsetp, symlinkp)
ino_t dir_ino;
ino_t root_ino;
int flags;
struct inode **res_inop;
size_t *offsetp;
int *symlinkp;
{
/* Parse the path in user_path, starting at dir_ino. If the path is the empty
* string, just return dir_ino. It is upto the caller to treat an empty
* path in a special way. Otherwise, if the path consists of just one or
* more slash ('/') characters, the path is replaced with ".". Otherwise,
* just look up the first (or only) component in path after skipping any
* leading slashes.
*/
int r, leaving_mount;
struct inode *rip, *dir_ip;
char *cp, *next_cp; /* component and next component */
char component[NAME_MAX+1];
/* Start parsing path at the first component in user_path */
cp = user_path;
/* No symlinks encountered yet */
*symlinkp = 0;
/* Find starting inode inode according to the request message */
if((rip = find_inode(fs_dev, dir_ino)) == NULL)
return(ENOENT);
/* If dir has been removed return ENOENT. */
if (rip->i_links_count == NO_LINK) return(ENOENT);
dup_inode(rip);
/* If the given start inode is a mountpoint, we must be here because the file
* system mounted on top returned an ELEAVEMOUNT error. In this case, we must
* only accept ".." as the first path component.
*/
leaving_mount = rip->i_mountpoint; /* True iff rip is a mountpoint */
/* Scan the path component by component. */
while (TRUE) {
if(cp[0] == '\0') {
/* We're done; either the path was empty or we've parsed all
components of the path */
*res_inop = rip;
*offsetp += cp - user_path;
/* Return EENTERMOUNT if we are at a mount point */
if (rip->i_mountpoint) return(EENTERMOUNT);
return(OK);
}
while(cp[0] == '/') cp++;
next_cp = get_name(cp, component);
if (next_cp == NULL) {
put_inode(rip);
return(err_code);
}
/* Special code for '..'. A process is not allowed to leave a chrooted
* environment. A lookup of '..' at the root of a mounted filesystem
* has to return ELEAVEMOUNT. In both cases, the caller needs search
* permission for the current inode, as it is used as directory.
*/
if(strcmp(component, "..") == 0) {
/* 'rip' is now accessed as directory */
if ((r = forbidden(rip, X_BIT)) != OK) {
put_inode(rip);
return(r);
}
if (rip->i_num == root_ino) {
cp = next_cp;
continue; /* Ignore the '..' at a process' root
and move on to the next component */
}
if (rip->i_num == ROOT_INODE && !rip->i_sp->s_is_root) {
/* Climbing up to parent FS */
put_inode(rip);
*offsetp += cp - user_path;
return(ELEAVEMOUNT);
}
}
/* Only check for a mount point if we are not coming from one. */
if (!leaving_mount && rip->i_mountpoint) {
/* Going to enter a child FS */
*res_inop = rip;
*offsetp += cp - user_path;
return(EENTERMOUNT);
}
/* There is more path. Keep parsing.
* If we're leaving a mountpoint, skip directory permission checks.
*/
dir_ip = rip;
rip = advance(dir_ip, leaving_mount ? dot2 : component, CHK_PERM);
if(err_code == ELEAVEMOUNT || err_code == EENTERMOUNT)
err_code = OK;
if (err_code != OK) {
put_inode(dir_ip);
return(err_code);
}
leaving_mount = 0;
/* The call to advance() succeeded. Fetch next component. */
if (S_ISLNK(rip->i_mode)) {
if (next_cp[0] == '\0' && (flags & PATH_RET_SYMLINK)) {
put_inode(dir_ip);
*res_inop = rip;
*offsetp += next_cp - user_path;
return(OK);
}
/* Extract path name from the symlink file */
r = ltraverse(rip, next_cp);
next_cp = user_path;
*offsetp = 0;
/* Symloop limit reached? */
if (++(*symlinkp) > SYMLOOP_MAX)
r = ELOOP;
if (r != OK) {
put_inode(dir_ip);
put_inode(rip);
return(r);
}
if (next_cp[0] == '/') {
put_inode(dir_ip);
put_inode(rip);
return(ESYMLINK);
}
put_inode(rip);
dup_inode(dir_ip);
rip = dir_ip;
}
put_inode(dir_ip);
cp = next_cp; /* Process subsequent component in next round */
}
}
/*===========================================================================*
* ltraverse *
*===========================================================================*/
PRIVATE int ltraverse(rip, suffix)
register struct inode *rip; /* symbolic link */
char *suffix; /* current remaining path. Has to point in the
* user_path buffer
*/
{
/* Traverse a symbolic link. Copy the link text from the inode and insert
* the text into the path. Return error code or report success. Base
* directory has to be determined according to the first character of the
* new pathname.
*/
block_t blink; /* block containing link text */
size_t llen; /* length of link */
size_t slen; /* length of suffix */
struct buf *bp; /* buffer containing link text */
const char *sp; /* start of link text */
llen = (size_t) rip->i_size;
if (llen > MAX_FAST_SYMLINK_LENGTH) {
/* normal symlink */
if ((blink = read_map(rip, (off_t) 0)) == NO_BLOCK)
return(EIO);
bp = get_block(rip->i_dev, blink, NORMAL);
sp = bp->b_data;
} else {
/* fast symlink, stored in inode */
sp = (const char*) rip->i_block;
}
slen = strlen(suffix);
/* The path we're parsing looks like this:
* /already/processed/path/<link> or
* /already/processed/path/<link>/not/yet/processed/path
* After expanding the <link>, the path will look like
* <expandedlink> or
* <expandedlink>/not/yet/processed
* In both cases user_path must have enough room to hold <expandedlink>.
* However, in the latter case we have to move /not/yet/processed to the
* right place first, before we expand <link>. When strlen(<expandedlink>) is
* smaller than strlen(/already/processes/path), we move the suffix to the
* left. Is strlen(<expandedlink>) greater then we move it to the right. Else
* we do nothing.
*/
if (slen > 0) { /* Do we have path after the link? */
/* For simplicity we require that suffix starts with a slash */
if (suffix[0] != '/') {
panic("ltraverse: suffix does not start with a slash");
}
/* To be able to expand the <link>, we have to move the 'suffix'
* to the right place.
*/
if (slen + llen + 1 > sizeof(user_path))
return(ENAMETOOLONG);/* <expandedlink>+suffix+\0 does not fit*/
if ((unsigned)(suffix - user_path) != llen) {
/* Move suffix left or right if needed */
memmove(&user_path[llen], suffix, slen+1);
}
} else {
if (llen + 1 > sizeof(user_path))
return(ENAMETOOLONG); /* <expandedlink> + \0 does not fit */
/* Set terminating nul */
user_path[llen]= '\0';
}
/* Everything is set, now copy the expanded link to user_path */
memmove(user_path, sp, llen);
if (llen > MAX_FAST_SYMLINK_LENGTH)
put_block(bp, DIRECTORY_BLOCK);
return(OK);
}
/*===========================================================================*
* advance *
*===========================================================================*/
PUBLIC struct inode *advance(dirp, string, chk_perm)
struct inode *dirp; /* inode for directory to be searched */
char string[NAME_MAX + 1]; /* component name to look for */
int chk_perm; /* check permissions when string is looked up*/
{
/* Given a directory and a component of a path, look up the component in
* the directory, find the inode, open it, and return a pointer to its inode
* slot.
*/
ino_t numb;
struct inode *rip;
/* If 'string' is empty, return an error. */
if (string[0] == '\0') {
err_code = ENOENT;
return(NULL);
}
/* Check for NULL. */
if (dirp == NULL) return(NULL);
/* If 'string' is not present in the directory, signal error. */
if ( (err_code = search_dir(dirp, string, &numb, LOOK_UP,
chk_perm, 0)) != OK) {
return(NULL);
}
/* The component has been found in the directory. Get inode. */
if ( (rip = get_inode(dirp->i_dev, (int) numb)) == NULL) {
return(NULL);
}
/* The following test is for "mountpoint/.." where mountpoint is a
* mountpoint. ".." will refer to the root of the mounted filesystem,
* but has to become a reference to the parent of the 'mountpoint'
* directory.
*
* This case is recognized by the looked up name pointing to a
* root inode, and the directory in which it is held being a
* root inode, _and_ the name[1] being '.'. (This is a test for '..'
* and excludes '.'.)
*/
if (rip->i_num == ROOT_INODE) {
if (dirp->i_num == ROOT_INODE) {
if (string[1] == '.') {
if (!rip->i_sp->s_is_root) {
/* Climbing up mountpoint */
err_code = ELEAVEMOUNT;
}
}
}
}
/* See if the inode is mounted on. If so, switch to root directory of the
* mounted file system. The super_block provides the linkage between the
* inode mounted on and the root directory of the mounted file system.
*/
if (rip->i_mountpoint) {
/* Mountpoint encountered, report it */
err_code = EENTERMOUNT;
}
return(rip);
}
/*===========================================================================*
* get_name *
*===========================================================================*/
PRIVATE char *get_name(path_name, string)
char *path_name; /* path name to parse */
char string[NAME_MAX+1]; /* component extracted from 'old_name' */
{
/* Given a pointer to a path name in fs space, 'path_name', copy the first
* component to 'string' (truncated if necessary, always nul terminated).
* A pointer to the string after the first component of the name as yet
* unparsed is returned. Roughly speaking,
* 'get_name' = 'path_name' - 'string'.
*
* This routine follows the standard convention that /usr/ast, /usr//ast,
* //usr///ast and /usr/ast/ are all equivalent.
*
* If len of component is greater, than allowed, then return 0.
*/
size_t len;
char *cp, *ep;
cp = path_name;
/* Skip leading slashes */
while (cp[0] == '/') cp++;
/* Find the end of the first component */
ep = cp;
while(ep[0] != '\0' && ep[0] != '/')
ep++;
len = (size_t) (ep - cp);
if (len > NAME_MAX || len > EXT2_NAME_MAX) {
err_code = ENAMETOOLONG;
return(NULL);
}
/* Special case of the string at cp is empty */
if (len == 0)
strcpy(string, "."); /* Return "." */
else {
memcpy(string, cp, len);
string[len]= '\0';
}
return(ep);
}
/*===========================================================================*
* search_dir *
*===========================================================================*/
PUBLIC int search_dir(ldir_ptr, string, numb, flag, check_permissions, ftype)
register struct inode *ldir_ptr; /* ptr to inode for dir to search */
char string[NAME_MAX + 1]; /* component to search for */
ino_t *numb; /* pointer to inode number */
int flag; /* LOOK_UP, ENTER, DELETE or IS_EMPTY */
int check_permissions; /* check permissions when flag is !IS_EMPTY */
int ftype; /* used when ENTER and
* INCOMPAT_FILETYPE */
{
/* This function searches the directory whose inode is pointed to by 'ldip':
* if (flag == ENTER) enter 'string' in the directory with inode # '*numb';
* if (flag == DELETE) delete 'string' from the directory;
* if (flag == LOOK_UP) search for 'string' and return inode # in 'numb';
* if (flag == IS_EMPTY) return OK if only . and .. in dir else ENOTEMPTY;
*
* if 'string' is dot1 or dot2, no access permissions are checked.
*/
register struct ext2_disk_dir_desc *dp = NULL;
register struct ext2_disk_dir_desc *prev_dp = NULL;
register struct buf *bp = NULL;
int i, r, e_hit, t, match;
mode_t bits;
off_t pos;
unsigned new_slots;
block_t b;
int extended = 0;
int required_space = 0;
int string_len = 0;
/* If 'ldir_ptr' is not a pointer to a dir inode, error. */
if ( (ldir_ptr->i_mode & I_TYPE) != I_DIRECTORY) {
return(ENOTDIR);
}
r = OK;
if (flag != IS_EMPTY) {
bits = (flag == LOOK_UP ? X_BIT : W_BIT | X_BIT);
if (string == dot1 || string == dot2) {
if (flag != LOOK_UP) r = read_only(ldir_ptr);
/* only a writable device is required. */
} else if(check_permissions) {
r = forbidden(ldir_ptr, bits); /* check access permissions */
}
}
if (r != OK) return(r);
new_slots = 0;
e_hit = FALSE;
match = 0; /* set when a string match occurs */
pos = 0;
if (flag == ENTER) {
string_len = strlen(string);
required_space = MIN_DIR_ENTRY_SIZE + string_len;
required_space += (required_space & 0x03) == 0 ? 0 :
(DIR_ENTRY_ALIGN - (required_space & 0x03) );
if (ldir_ptr->i_last_dpos < ldir_ptr->i_size &&
ldir_ptr->i_last_dentry_size <= required_space)
pos = ldir_ptr->i_last_dpos;
}
for (; pos < ldir_ptr->i_size; pos += ldir_ptr->i_sp->s_block_size) {
b = read_map(ldir_ptr, pos); /* get block number */
/* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
bp = get_block(ldir_ptr->i_dev, b, NORMAL); /* get a dir block */
prev_dp = NULL; /* New block - new first dentry, so no prev. */
if (bp == NO_BLOCK)
panic("get_block returned NO_BLOCK");
assert(bp != NULL);
/* Search a directory block.
* Note, we set prev_dp at the end of the loop.
*/
for (dp = (struct ext2_disk_dir_desc*) &bp->b_data;
CUR_DISC_DIR_POS(dp, &bp->b_data) < ldir_ptr->i_sp->s_block_size;
dp = NEXT_DISC_DIR_DESC(dp) ) {
/* Match occurs if string found. */
if (flag != ENTER && dp->d_ino != NO_ENTRY) {
if (flag == IS_EMPTY) {
/* If this test succeeds, dir is not empty. */
if (ansi_strcmp(dp->d_name, ".", dp->d_name_len) != 0 &&
ansi_strcmp(dp->d_name, "..", dp->d_name_len) != 0) match = 1;
} else {
if (ansi_strcmp(dp->d_name, string, dp->d_name_len) == 0){
match = 1;
}
}
}
if (match) {
/* LOOK_UP or DELETE found what it wanted. */
r = OK;
if (flag == IS_EMPTY) r = ENOTEMPTY;
else if (flag == DELETE) {
if (dp->d_name_len >= sizeof(ino_t)) {
/* Save d_ino for recovery. */
t = dp->d_name_len - sizeof(ino_t);
*((ino_t *) &dp->d_name[t]) = dp->d_ino;
}
dp->d_ino = NO_ENTRY; /* erase entry */
bp->b_dirt = DIRTY;
/* If we don't support HTree (directory index),
* which is fully compatible ext2 feature,
* we should reset EXT2_INDEX_FL, when modify
* linked directory structure.
*
* @TODO: actually we could just reset it for
* each directory, but I added if() to not
* forget about it later, when add HTree
* support.
*/
if (!HAS_COMPAT_FEATURE(ldir_ptr->i_sp,
COMPAT_DIR_INDEX))
ldir_ptr->i_flags &= ~EXT2_INDEX_FL;
ldir_ptr->i_last_dpos = pos;
ldir_ptr->i_last_dentry_size = conv2(le_CPU,
dp->d_rec_len);
ldir_ptr->i_update |= CTIME | MTIME;
ldir_ptr->i_dirt = DIRTY;
/* Now we have cleared dentry, if it's not
* the first one, merge it with previous one.
* Since we assume, that existing dentry must be
* correct, there is no way to spann a data block.
*/
if (prev_dp) {
u16_t temp = conv2(le_CPU,
prev_dp->d_rec_len);
temp += conv2(le_CPU,
dp->d_rec_len);
prev_dp->d_rec_len = conv2(le_CPU,
temp);
}
} else {
/* 'flag' is LOOK_UP */
*numb = (ino_t) conv4(le_CPU, dp->d_ino);
}
put_block(bp, DIRECTORY_BLOCK);
return(r);
}
/* Check for free slot for the benefit of ENTER. */
if (flag == ENTER && dp->d_ino == NO_ENTRY) {
/* we found a free slot, check if it has enough space */
if (required_space <= conv2(le_CPU, dp->d_rec_len)) {
e_hit = TRUE; /* we found a free slot */
break;
}
}
/* Can we shrink dentry? */
if (flag == ENTER && required_space <= DIR_ENTRY_SHRINK(dp)) {
/* Shrink directory and create empty slot, now
* dp->d_rec_len = DIR_ENTRY_ACTUAL_SIZE + DIR_ENTRY_SHRINK.
*/
int new_slot_size = conv2(le_CPU, dp->d_rec_len);
int actual_size = DIR_ENTRY_ACTUAL_SIZE(dp);
new_slot_size -= actual_size;
dp->d_rec_len = conv2(le_CPU, actual_size);
dp = NEXT_DISC_DIR_DESC(dp);
dp->d_rec_len = conv2(le_CPU, new_slot_size);
/* if we fail before writing real ino */
dp->d_ino = NO_ENTRY;
bp->b_dirt = DIRTY;
e_hit = TRUE; /* we found a free slot */
break;
}
prev_dp = dp;
}
/* The whole block has been searched or ENTER has a free slot. */
if (e_hit) break; /* e_hit set if ENTER can be performed now */
put_block(bp, DIRECTORY_BLOCK); /* otherwise, continue searching dir */
}
/* The whole directory has now been searched. */
if (flag != ENTER) {
return(flag == IS_EMPTY ? OK : ENOENT);
}
/* When ENTER next time, start searching for free slot from
* i_last_dpos. It gives solid performance improvement.
*/
ldir_ptr->i_last_dpos = pos;
ldir_ptr->i_last_dentry_size = required_space;
/* This call is for ENTER. If no free slot has been found so far, try to
* extend directory.
*/
if (e_hit == FALSE) { /* directory is full and no room left in last block */
new_slots++; /* increase directory size by 1 entry */
if ( (bp = new_block(ldir_ptr, ldir_ptr->i_size)) == NULL)
return(err_code);
dp = (struct ext2_disk_dir_desc*) &bp->b_data;
dp->d_rec_len = conv2(le_CPU, ldir_ptr->i_sp->s_block_size);
dp->d_name_len = DIR_ENTRY_MAX_NAME_LEN(dp); /* for failure */
extended = 1;
}
/* 'bp' now points to a directory block with space. 'dp' points to slot. */
dp->d_name_len = string_len;
for (i = 0; i < NAME_MAX && i < dp->d_name_len && string[i]; i++)
dp->d_name[i] = string[i];
dp->d_ino = (int) conv4(le_CPU, *numb);
if (HAS_INCOMPAT_FEATURE(ldir_ptr->i_sp, INCOMPAT_FILETYPE)) {
/* Convert ftype (from inode.i_mode) to dp->d_file_type */
if (ftype == I_REGULAR)
dp->d_file_type = EXT2_FT_REG_FILE;
else if (ftype == I_DIRECTORY)
dp->d_file_type = EXT2_FT_DIR;
else if (ftype == I_SYMBOLIC_LINK)
dp->d_file_type = EXT2_FT_SYMLINK;
else if (ftype == I_BLOCK_SPECIAL)
dp->d_file_type = EXT2_FT_BLKDEV;
else if (ftype == I_CHAR_SPECIAL)
dp->d_file_type = EXT2_FT_CHRDEV;
else if (ftype == I_NAMED_PIPE)
dp->d_file_type = EXT2_FT_FIFO;
else
dp->d_file_type = EXT2_FT_UNKNOWN;
}
bp->b_dirt = DIRTY;
put_block(bp, DIRECTORY_BLOCK);
ldir_ptr->i_update |= CTIME | MTIME; /* mark mtime for update later */
ldir_ptr->i_dirt = DIRTY;
if (new_slots == 1) {
ldir_ptr->i_size += (off_t) conv2(le_CPU, dp->d_rec_len);
/* Send the change to disk if the directory is extended. */
if (extended) rw_inode(ldir_ptr, WRITING);
}
return(OK);
}

154
servers/ext2/protect.c Normal file
View file

@ -0,0 +1,154 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
FORWARD _PROTOTYPE( int in_group, (gid_t grp) );
/*===========================================================================*
* fs_chmod *
*===========================================================================*/
PUBLIC int fs_chmod()
{
/* Perform the chmod(name, mode) system call. */
register struct inode *rip;
mode_t mode;
mode = (mode_t) fs_m_in.REQ_MODE;
/* Temporarily open the file. */
if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* Now make the change. Clear setgid bit if file is not in caller's grp */
rip->i_mode = (rip->i_mode & ~ALL_MODES) | (mode & ALL_MODES);
rip->i_update |= CTIME;
rip->i_dirt = DIRTY;
/* Return full new mode to caller. */
fs_m_out.RES_MODE = rip->i_mode;
put_inode(rip);
return(OK);
}
/*===========================================================================*
* fs_chown *
*===========================================================================*/
PUBLIC int fs_chown()
{
register struct inode *rip;
register int r;
/* Temporarily open the file. */
if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* Not permitted to change the owner of a file on a read-only file sys. */
r = read_only(rip);
if (r == OK) {
rip->i_uid = fs_m_in.REQ_UID;
rip->i_gid = fs_m_in.REQ_GID;
rip->i_mode &= ~(I_SET_UID_BIT | I_SET_GID_BIT);
rip->i_update |= CTIME;
rip->i_dirt = DIRTY;
}
/* Update caller on current mode, as it may have changed. */
fs_m_out.RES_MODE = rip->i_mode;
put_inode(rip);
return(r);
}
/*===========================================================================*
* forbidden *
*===========================================================================*/
PUBLIC int forbidden(register struct inode *rip, mode_t access_desired)
{
/* Given a pointer to an inode, 'rip', and the access desired, determine
* if the access is allowed, and if not why not. The routine looks up the
* caller's uid in the 'fproc' table. If access is allowed, OK is returned
* if it is forbidden, EACCES is returned.
*/
register struct inode *old_rip = rip;
register mode_t bits, perm_bits;
int r, shift;
/* Isolate the relevant rwx bits from the mode. */
bits = rip->i_mode;
if (caller_uid == SU_UID) {
/* Grant read and write permission. Grant search permission for
* directories. Grant execute permission (for non-directories) if
* and only if one of the 'X' bits is set.
*/
if ( (bits & I_TYPE) == I_DIRECTORY ||
bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT))
perm_bits = R_BIT | W_BIT | X_BIT;
else
perm_bits = R_BIT | W_BIT;
} else {
if (caller_uid == rip->i_uid) shift = 6; /* owner */
else if (caller_gid == rip->i_gid) shift = 3; /* group */
else if (in_group(rip->i_gid) == OK) shift = 3; /* other groups */
else shift = 0; /* other */
perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT);
}
/* If access desired is not a subset of what is allowed, it is refused. */
r = OK;
if ((perm_bits | access_desired) != perm_bits) r = EACCES;
/* Check to see if someone is trying to write on a file system that is
* mounted read-only.
*/
if (r == OK) {
if (access_desired & W_BIT) {
r = read_only(rip);
}
}
if (rip != old_rip) put_inode(rip);
return(r);
}
/*===========================================================================*
* in_group *
*===========================================================================*/
PRIVATE int in_group(gid_t grp)
{
int i;
for(i = 0; i < credentials.vu_ngroups; i++)
if (credentials.vu_sgroups[i] == grp)
return(OK);
return(EINVAL);
}
/*===========================================================================*
* read_only *
*===========================================================================*/
PUBLIC int read_only(ip)
struct inode *ip; /* ptr to inode whose file sys is to be cked */
{
/* Check to see if the file system on which the inode 'ip' resides is mounted
* read only. If so, return EROFS, else return OK.
*/
register struct super_block *sp;
sp = ip->i_sp;
return(sp->s_rd_only ? EROFS : OK);
}

135
servers/ext2/proto.h Normal file
View file

@ -0,0 +1,135 @@
#ifndef EXT2_PROTO_H
#define EXT2_PROTO_H
/* Function prototypes. */
/* Structs used in prototypes must be declared as such first. */
struct buf;
struct filp;
struct inode;
struct super_block;
/* balloc.c */
_PROTOTYPE( void discard_preallocated_blocks, (struct inode *rip) );
_PROTOTYPE( block_t alloc_block, (struct inode *rip, block_t goal) );
_PROTOTYPE( void free_block, (struct super_block *sp, bit_t bit) );
/* cache.c */
_PROTOTYPE( void buf_pool, (int bufs) );
_PROTOTYPE( void flushall, (dev_t dev) );
_PROTOTYPE( struct buf *get_block, (dev_t dev, block_t block,int only_search));
_PROTOTYPE( void invalidate, (dev_t device) );
_PROTOTYPE( void put_block, (struct buf *bp, int block_type) );
_PROTOTYPE( void set_blocksize, (unsigned int blocksize) );
_PROTOTYPE( void rw_scattered, (dev_t dev,
struct buf **bufq, int bufqsize, int rw_flag) );
/* device.c */
_PROTOTYPE( int block_dev_io, (int op, dev_t dev, endpoint_t proc_e,
void *buf, u64_t pos, size_t bytes) );
_PROTOTYPE( int dev_open, (endpoint_t driver_e, dev_t dev, endpoint_t proc_e,
int flags) );
_PROTOTYPE( void dev_close, (endpoint_t driver_e, dev_t dev) );
_PROTOTYPE( int fs_new_driver, (void) );
/* ialloc.c */
_PROTOTYPE( struct inode *alloc_inode, (struct inode *parent, mode_t bits));
_PROTOTYPE( void free_inode, (struct inode *rip) );
/* inode.c */
_PROTOTYPE( void dup_inode, (struct inode *ip) );
_PROTOTYPE( struct inode *find_inode, (dev_t dev, ino_t numb) );
_PROTOTYPE( int fs_putnode, (void) );
_PROTOTYPE( void init_inode_cache, (void) );
_PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb) );
_PROTOTYPE( void put_inode, (struct inode *rip) );
_PROTOTYPE( void update_times, (struct inode *rip) );
_PROTOTYPE( void rw_inode, (struct inode *rip, int rw_flag) );
/* link.c */
_PROTOTYPE( int fs_ftrunc, (void) );
_PROTOTYPE( int fs_link, (void) );
_PROTOTYPE( int fs_rdlink, (void) );
_PROTOTYPE( int fs_rename, (void) );
_PROTOTYPE( int fs_unlink, (void) );
_PROTOTYPE( int truncate_inode, (struct inode *rip, off_t len) );
/* misc.c */
_PROTOTYPE( int fs_flush, (void) );
_PROTOTYPE( int fs_sync, (void) );
/* mount.c */
_PROTOTYPE( int fs_mountpoint, (void) );
_PROTOTYPE( int fs_readsuper, (void) );
_PROTOTYPE( int fs_unmount, (void) );
/* open.c */
_PROTOTYPE( int fs_create, (void) );
_PROTOTYPE( int fs_inhibread, (void) );
_PROTOTYPE( int fs_mkdir, (void) );
_PROTOTYPE( int fs_mknod, (void) );
_PROTOTYPE( int fs_slink, (void) );
/* path.c */
_PROTOTYPE( int fs_lookup, (void) );
_PROTOTYPE( struct inode *advance, (struct inode *dirp,
char string[NAME_MAX + 1], int chk_perm));
_PROTOTYPE( int search_dir, (struct inode *ldir_ptr,
char string [NAME_MAX + 1], ino_t *numb, int flag,
int check_permissions, int ftype) );
/* protect.c */
_PROTOTYPE( int fs_chmod, (void) );
_PROTOTYPE( int fs_chown, (void) );
_PROTOTYPE( int fs_getdents, (void) );
_PROTOTYPE( int forbidden, (struct inode *rip, mode_t access_desired) );
_PROTOTYPE( int read_only, (struct inode *ip) );
/* read.c */
_PROTOTYPE( int fs_breadwrite, (void) );
_PROTOTYPE( int fs_readwrite, (void) );
_PROTOTYPE( void read_ahead, (void) );
_PROTOTYPE( block_t read_map, (struct inode *rip, off_t pos) );
_PROTOTYPE( block_t rd_indir, (struct buf *bp, int index) );
/* stadir.c */
_PROTOTYPE( int fs_fstatfs, (void) );
_PROTOTYPE( int fs_stat, (void) );
_PROTOTYPE( int fs_statvfs, (void) );
/* super.c */
_PROTOTYPE( unsigned int get_block_size, (dev_t dev) );
_PROTOTYPE( struct super_block *get_super, (dev_t dev) );
_PROTOTYPE( int read_super, (struct super_block *sp) );
_PROTOTYPE( void write_super, (struct super_block *sp) );
_PROTOTYPE( struct group_desc* get_group_desc, (unsigned int bnum) );
/* time.c */
_PROTOTYPE( int fs_utime, (void) );
/* utility.c */
_PROTOTYPE( time_t clock_time, (void) );
_PROTOTYPE( unsigned conv2, (int norm, int w) );
_PROTOTYPE( long conv4, (int norm, long x) );
_PROTOTYPE( void mfs_nul_f, (char *file, int line, char *str,
unsigned int len, unsigned int maxlen) );
_PROTOTYPE( int min, (unsigned int l, unsigned int r) );
_PROTOTYPE( int no_sys, (void) );
_PROTOTYPE( void sanitycheck, (char *file, int line) );
#define SANITYCHECK sanitycheck(__FILE__, __LINE__)
_PROTOTYPE( int ansi_strcmp, (register const char* ansi_s,
register const char *s2,
register size_t ansi_s_length) );
_PROTOTYPE( bit_t setbit, (bitchunk_t *bitmap, bit_t max_bits,
unsigned int word));
_PROTOTYPE( bit_t setbyte, (bitchunk_t *bitmap, bit_t max_bits,
unsigned int word));
_PROTOTYPE( int unsetbit, (bitchunk_t *bitmap, bit_t bit) );
/* write.c */
_PROTOTYPE( struct buf *new_block, (struct inode *rip, off_t position) );
_PROTOTYPE( void zero_block, (struct buf *bp) );
_PROTOTYPE( int write_map, (struct inode *, off_t, block_t, int) );
#endif /* EXT2_PROTO_H */

685
servers/ext2/read.c Normal file
View file

@ -0,0 +1,685 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <minix/com.h>
#include <minix/u64.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
#include <assert.h>
FORWARD _PROTOTYPE( struct buf *rahead, (struct inode *rip, block_t baseblock,
u64_t position, unsigned bytes_ahead) );
FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, u64_t position,
unsigned off, size_t chunk, unsigned left, int rw_flag,
cp_grant_id_t gid, unsigned buf_off, unsigned int block_size,
int *completed));
PRIVATE char getdents_buf[GETDENTS_BUFSIZ];
PRIVATE off_t rdahedpos; /* position to read ahead */
PRIVATE struct inode *rdahed_inode; /* pointer to inode to read ahead */
/*===========================================================================*
* fs_readwrite *
*===========================================================================*/
PUBLIC int fs_readwrite(void)
{
int r, rw_flag, block_spec;
int regular;
cp_grant_id_t gid;
off_t position, f_size, bytes_left;
unsigned int off, cum_io, block_size, chunk;
mode_t mode_word;
int completed;
struct inode *rip;
size_t nrbytes;
r = OK;
/* Find the inode referred */
if ((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
mode_word = rip->i_mode & I_TYPE;
regular = (mode_word == I_REGULAR || mode_word == I_NAMED_PIPE);
block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0);
/* Determine blocksize */
if (block_spec) {
block_size = get_block_size( (dev_t) rip->i_block[0]);
f_size = MAX_FILE_POS;
} else {
block_size = rip->i_sp->s_block_size;
f_size = rip->i_size;
if (f_size < 0) f_size = MAX_FILE_POS;
}
/* Get the values from the request message */
rw_flag = (fs_m_in.m_type == REQ_READ ? READING : WRITING);
gid = (cp_grant_id_t) fs_m_in.REQ_GRANT;
position = (off_t) fs_m_in.REQ_SEEK_POS_LO;
nrbytes = (size_t) fs_m_in.REQ_NBYTES;
rdwt_err = OK; /* set to EIO if disk error occurs */
if (rw_flag == WRITING && !block_spec) {
/* Check in advance to see if file will grow too big. */
if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
return(EFBIG);
}
cum_io = 0;
/* Split the transfer into chunks that don't span two blocks. */
while (nrbytes != 0) {
off = (unsigned int) (position % block_size);/* offset in blk*/
chunk = MIN(nrbytes, block_size - off);
if (rw_flag == READING) {
bytes_left = f_size - position;
if (position >= f_size) break; /* we are beyond EOF */
if (chunk > bytes_left) chunk = (int) bytes_left;
}
/* Read or write 'chunk' bytes. */
r = rw_chunk(rip, cvul64((unsigned long) position), off, chunk,
nrbytes, rw_flag, gid, cum_io, block_size, &completed);
if (r != OK) break; /* EOF reached */
if (rdwt_err < 0) break;
/* Update counters and pointers. */
nrbytes -= chunk; /* bytes yet to be read */
cum_io += chunk; /* bytes read so far */
position += (off_t) chunk; /* position within the file */
}
fs_m_out.RES_SEEK_POS_LO = position; /* It might change later and the VFS
has to know this value */
/* On write, update file size and access time. */
if (rw_flag == WRITING) {
if (regular || mode_word == I_DIRECTORY) {
if (position > f_size) rip->i_size = position;
}
}
/* Check to see if read-ahead is called for, and if so, set it up. */
if(rw_flag == READING && rip->i_seek == NO_SEEK &&
(unsigned int) position % block_size == 0 &&
(regular || mode_word == I_DIRECTORY)) {
rdahed_inode = rip;
rdahedpos = position;
}
rip->i_seek = NO_SEEK;
if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
if (rdwt_err == END_OF_FILE) r = OK;
if (r == OK) {
if (rw_flag == READING) rip->i_update |= ATIME;
if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
rip->i_dirt = DIRTY; /* inode is thus now dirty */
}
fs_m_out.RES_NBYTES = cum_io;
return(r);
}
/*===========================================================================*
* fs_breadwrite *
*===========================================================================*/
PUBLIC int fs_breadwrite(void)
{
int r, rw_flag, completed;
cp_grant_id_t gid;
u64_t position;
unsigned int off, cum_io, chunk, block_size;
size_t nrbytes;
/* Pseudo inode for rw_chunk */
struct inode rip;
r = OK;
/* Get the values from the request message */
rw_flag = (fs_m_in.m_type == REQ_BREAD ? READING : WRITING);
gid = (cp_grant_id_t) fs_m_in.REQ_GRANT;
position = make64((unsigned long) fs_m_in.REQ_SEEK_POS_LO,
(unsigned long) fs_m_in.REQ_SEEK_POS_HI);
nrbytes = (size_t) fs_m_in.REQ_NBYTES;
block_size = get_block_size( (dev_t) fs_m_in.REQ_DEV2);
rip.i_block[0] = (block_t) fs_m_in.REQ_DEV2;
rip.i_mode = I_BLOCK_SPECIAL;
rip.i_size = 0;
rdwt_err = OK; /* set to EIO if disk error occurs */
cum_io = 0;
/* Split the transfer into chunks that don't span two blocks. */
while (nrbytes > 0) {
off = rem64u(position, block_size); /* offset in blk*/
chunk = min(nrbytes, block_size - off);
/* Read or write 'chunk' bytes. */
r = rw_chunk(&rip, position, off, chunk, nrbytes, rw_flag, gid,
cum_io, block_size, &completed);
if (r != OK) break; /* EOF reached */
if (rdwt_err < 0) break;
/* Update counters and pointers. */
nrbytes -= chunk; /* bytes yet to be read */
cum_io += chunk; /* bytes read so far */
position = add64ul(position, chunk); /* position within the file */
}
fs_m_out.RES_SEEK_POS_LO = ex64lo(position);
fs_m_out.RES_SEEK_POS_HI = ex64hi(position);
if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
if (rdwt_err == END_OF_FILE) r = OK;
fs_m_out.RES_NBYTES = cum_io;
return(r);
}
/*===========================================================================*
* rw_chunk *
*===========================================================================*/
PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, gid,
buf_off, block_size, completed)
register struct inode *rip; /* pointer to inode for file to be rd/wr */
u64_t position; /* position within file to read or write */
unsigned off; /* off within the current block */
unsigned int chunk; /* number of bytes to read or write */
unsigned left; /* max number of bytes wanted after position */
int rw_flag; /* READING or WRITING */
cp_grant_id_t gid; /* grant */
unsigned buf_off; /* offset in grant */
unsigned int block_size; /* block size of FS operating on */
int *completed; /* number of bytes copied */
{
/* Read or write (part of) a block. */
register struct buf *bp;
register int r = OK;
int n, block_spec;
block_t b;
dev_t dev;
*completed = 0;
block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
if (block_spec) {
b = div64u(position, block_size);
dev = (dev_t) rip->i_block[0];
} else {
if (ex64hi(position) != 0)
panic("rw_chunk: position too high");
b = read_map(rip, (off_t) ex64lo(position));
dev = rip->i_dev;
}
if (!block_spec && b == NO_BLOCK) {
if (rw_flag == READING) {
/* Reading from a nonexistent block. Must read as all zeros.*/
bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */
zero_block(bp);
} else {
/* Writing to a nonexistent block. Create and enter in inode.*/
if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
return(err_code);
}
} else if (rw_flag == READING) {
/* Read and read ahead if convenient. */
bp = rahead(rip, b, position, left);
} else {
/* Normally an existing block to be partially overwritten is first read
* in. However, a full block need not be read in. If it is already in
* the cache, acquire it, otherwise just acquire a free buffer.
*/
n = (chunk == block_size ? NO_READ : NORMAL);
if (!block_spec && off == 0 && (off_t) ex64lo(position) >= rip->i_size)
n = NO_READ;
bp = get_block(dev, b, n);
}
/* In all cases, bp now points to a valid buffer. */
if (bp == NULL)
panic("bp not valid in rw_chunk, this can't happen");
if (rw_flag == WRITING && chunk != block_size && !block_spec &&
(off_t) ex64lo(position) >= rip->i_size && off == 0) {
zero_block(bp);
}
if (rw_flag == READING) {
/* Copy a chunk from the block buffer to user space. */
r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) buf_off,
(vir_bytes) (bp->b_data+off), (size_t) chunk, D);
} else {
/* Copy a chunk from user space to the block buffer. */
r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) buf_off,
(vir_bytes) (bp->b_data+off), (size_t) chunk, D);
bp->b_dirt = DIRTY;
}
n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
put_block(bp, n);
return(r);
}
/*===========================================================================*
* read_map *
*===========================================================================*/
PUBLIC block_t read_map(rip, position)
register struct inode *rip; /* ptr to inode to map from */
off_t position; /* position in file whose blk wanted */
{
/* Given an inode and a position within the corresponding file, locate the
* block number in which that position is to be found and return it.
*/
struct buf *bp;
int index;
block_t b;
unsigned long excess, block_pos;
static char first_time = TRUE;
static long addr_in_block;
static long addr_in_block2;
static long doub_ind_s;
static long triple_ind_s;
static long out_range_s;
if (first_time) {
addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
addr_in_block2 = addr_in_block * addr_in_block;
doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
triple_ind_s = doub_ind_s + addr_in_block2;
out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
first_time = FALSE;
}
block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
/* Is 'position' to be found in the inode itself? */
if (block_pos < EXT2_NDIR_BLOCKS)
return(rip->i_block[block_pos]);
/* It is not in the inode, so it must be single, double or triple indirect */
if (block_pos < doub_ind_s) {
b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */
index = block_pos - EXT2_NDIR_BLOCKS;
} else if (block_pos >= out_range_s) { /* TODO: do we need it? */
return(NO_BLOCK);
} else {
/* double or triple indirect block. At first if it's triple,
* find double indirect block.
*/
excess = block_pos - doub_ind_s;
b = rip->i_block[EXT2_DIND_BLOCK];
if (block_pos >= triple_ind_s) {
b = rip->i_block[EXT2_TIND_BLOCK];
if (b == NO_BLOCK) return(NO_BLOCK);
bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */
ASSERT(bp->b_dev != NO_DEV);
ASSERT(bp->b_dev == rip->i_dev);
excess = block_pos - triple_ind_s;
index = excess / addr_in_block2;
b = rd_indir(bp, index); /* num of double ind block */
put_block(bp, INDIRECT_BLOCK); /* release triple ind block */
excess = excess % addr_in_block2;
}
if (b == NO_BLOCK) return(NO_BLOCK);
bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */
ASSERT(bp->b_dev != NO_DEV);
ASSERT(bp->b_dev == rip->i_dev);
index = excess / addr_in_block;
b = rd_indir(bp, index); /* num of single ind block */
put_block(bp, INDIRECT_BLOCK); /* release double ind block */
index = excess % addr_in_block; /* index into single ind blk */
}
if (b == NO_BLOCK) return(NO_BLOCK);
bp = get_block(rip->i_dev, b, NORMAL);
ASSERT(bp->b_dev != NO_DEV);
ASSERT(bp->b_dev == rip->i_dev);
b = rd_indir(bp, index);
put_block(bp, INDIRECT_BLOCK); /* release single ind block */
return(b);
}
/*===========================================================================*
* rd_indir *
*===========================================================================*/
PUBLIC block_t rd_indir(bp, index)
struct buf *bp; /* pointer to indirect block */
int index; /* index into *bp */
{
if (bp == NULL)
panic("rd_indir() on NULL");
/* TODO: use conv call */
return conv4(le_CPU, bp->b_ind[index]);
}
/*===========================================================================*
* read_ahead *
*===========================================================================*/
PUBLIC void read_ahead()
{
/* Read a block into the cache before it is needed. */
unsigned int block_size;
register struct inode *rip;
struct buf *bp;
block_t b;
if(!rdahed_inode)
return;
rip = rdahed_inode; /* pointer to inode to read ahead from */
block_size = get_block_size(rip->i_dev);
rdahed_inode = NULL; /* turn off read ahead */
if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */
assert(rdahedpos > 0); /* So we can safely cast it to unsigned below */
bp = rahead(rip, b, cvul64((unsigned long) rdahedpos), block_size);
put_block(bp, PARTIAL_DATA_BLOCK);
}
/*===========================================================================*
* rahead *
*===========================================================================*/
PRIVATE struct buf *rahead(rip, baseblock, position, bytes_ahead)
register struct inode *rip; /* pointer to inode for file to be read */
block_t baseblock; /* block at current position */
u64_t position; /* position within file */
unsigned bytes_ahead; /* bytes beyond position for immediate use */
{
/* Fetch a block from the cache or the device. If a physical read is
* required, prefetch as many more blocks as convenient into the cache.
* This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
* The device driver may decide it knows better and stop reading at a
* cylinder boundary (or after an error). Rw_scattered() puts an optional
* flag on all reads to allow this.
*/
/* Minimum number of blocks to prefetch. */
# define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
int block_spec, read_q_size;
unsigned int blocks_ahead, fragment, block_size;
block_t block, blocks_left;
off_t ind1_pos;
dev_t dev;
struct buf *bp;
static unsigned int readqsize = 0;
static struct buf **read_q;
if(readqsize != nr_bufs) {
if(readqsize > 0) {
assert(read_q != NULL);
free(read_q);
}
if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs)))
panic("couldn't allocate read_q");
readqsize = nr_bufs;
}
block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
if (block_spec)
dev = (dev_t) rip->i_block[0];
else
dev = rip->i_dev;
block_size = get_block_size(dev);
block = baseblock;
bp = get_block(dev, block, PREFETCH);
if (bp->b_dev != NO_DEV) return(bp);
/* The best guess for the number of blocks to prefetch: A lot.
* It is impossible to tell what the device looks like, so we don't even
* try to guess the geometry, but leave it to the driver.
*
* The floppy driver can read a full track with no rotational delay, and it
* avoids reading partial tracks if it can, so handing it enough buffers to
* read two tracks is perfect. (Two, because some diskette types have
* an odd number of sectors per track, so a block may span tracks.)
*
* The disk drivers don't try to be smart. With todays disks it is
* impossible to tell what the real geometry looks like, so it is best to
* read as much as you can. With luck the caching on the drive allows
* for a little time to start the next read.
*
* The current solution below is a bit of a hack, it just reads blocks from
* the current file position hoping that more of the file can be found. A
* better solution must look at the already available
* indirect blocks (but don't call read_map!).
*/
fragment = rem64u(position, block_size);
position = sub64u(position, fragment);
bytes_ahead += fragment;
blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
if (block_spec && rip->i_size == 0) {
blocks_left = (block_t) NR_IOREQS;
} else {
blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
block_size;
/* Go for the first indirect block if we are in its neighborhood. */
if (!block_spec) {
ind1_pos = (EXT2_NDIR_BLOCKS) * block_size;
if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
blocks_ahead++;
blocks_left++;
}
}
}
/* No more than the maximum request. */
if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
/* Read at least the minimum number of blocks, but not after a seek. */
if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
blocks_ahead = BLOCKS_MINIMUM;
/* Can't go past end of file. */
if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
read_q_size = 0;
/* Acquire block buffers. */
for (;;) {
read_q[read_q_size++] = bp;
if (--blocks_ahead == 0) break;
/* Don't trash the cache, leave 4 free. */
if (bufs_in_use >= nr_bufs - 4) break;
block++;
bp = get_block(dev, block, PREFETCH);
if (bp->b_dev != NO_DEV) {
/* Oops, block already in the cache, get out. */
put_block(bp, FULL_DATA_BLOCK);
break;
}
}
rw_scattered(dev, read_q, read_q_size, READING);
return(get_block(dev, baseblock, NORMAL));
}
/*===========================================================================*
* fs_getdents *
*===========================================================================*/
PUBLIC int fs_getdents(void)
{
register struct inode *rip;
int o, r, done;
unsigned int block_size, len, reclen;
ino_t ino;
block_t b;
cp_grant_id_t gid;
size_t size, tmpbuf_off, userbuf_off;
off_t pos, off, block_pos, new_pos, ent_pos;
struct buf *bp;
struct ext2_disk_dir_desc *d_desc;
struct dirent *dep;
char *cp;
ino = (ino_t) fs_m_in.REQ_INODE_NR;
gid = (gid_t) fs_m_in.REQ_GRANT;
size = (size_t) fs_m_in.REQ_MEM_SIZE;
pos = (off_t) fs_m_in.REQ_SEEK_POS_LO;
/* Check whether the position is properly aligned */
if ((unsigned int) pos % DIR_ENTRY_ALIGN)
return(ENOENT);
if ((rip = get_inode(fs_dev, ino)) == NULL)
return(EINVAL);
block_size = rip->i_sp->s_block_size;
off = (pos % block_size); /* Offset in block */
block_pos = pos - off;
done = FALSE; /* Stop processing directory blocks when done is set */
memset(getdents_buf, '\0', GETDENTS_BUFSIZ); /* Avoid leaking any data */
tmpbuf_off = 0; /* Offset in getdents_buf */
userbuf_off = 0; /* Offset in the user's buffer */
/* The default position for the next request is EOF. If the user's buffer
* fills up before EOF, new_pos will be modified. */
new_pos = rip->i_size;
for (; block_pos < rip->i_size; block_pos += block_size) {
off_t temp_pos = block_pos;
b = read_map(rip, block_pos); /* get block number */
/* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
bp = get_block(rip->i_dev, b, NORMAL); /* get a dir block */
if (bp == NO_BLOCK)
panic("get_block returned NO_BLOCK");
assert(bp != NULL);
/* Search a directory block. */
d_desc = (struct ext2_disk_dir_desc*) &bp->b_data;
/* we need to seek to entry at off bytes.
* when NEXT_DISC_DIR_POS == block_size it's last dentry.
*/
for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos
&& NEXT_DISC_DIR_POS(d_desc, &bp->b_data) < block_size;
d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
temp_pos += conv2(le_CPU, d_desc->d_rec_len);
}
for (; CUR_DISC_DIR_POS(d_desc, &bp->b_data) < block_size;
d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
if (d_desc->d_ino == 0)
continue; /* Entry is not in use */
if (d_desc->d_name_len > NAME_MAX ||
d_desc->d_name_len > EXT2_NAME_MAX) {
len = min(NAME_MAX, EXT2_NAME_MAX);
} else {
len = d_desc->d_name_len;
}
/* Compute record length */
reclen = offsetof(struct dirent, d_name) + len + 1;
o = (reclen % sizeof(long));
if (o != 0)
reclen += sizeof(long) - o;
/* Need the position of this entry in the directory */
ent_pos = block_pos + ((char *)d_desc - bp->b_data);
if (tmpbuf_off + reclen > GETDENTS_BUFSIZ) {
r = sys_safecopyto(VFS_PROC_NR, gid,
(vir_bytes) userbuf_off,
(vir_bytes) getdents_buf,
(size_t) tmpbuf_off, D);
if (r != OK) {
put_inode(rip);
return(r);
}
userbuf_off += tmpbuf_off;
tmpbuf_off = 0;
}
if (userbuf_off + tmpbuf_off + reclen > size) {
/* The user has no space for one more record */
done = TRUE;
/* Record the position of this entry, it is the
* starting point of the next request (unless the
* position is modified with lseek).
*/
new_pos = ent_pos;
break;
}
dep = (struct dirent *) &getdents_buf[tmpbuf_off];
dep->d_ino = conv4(le_CPU, d_desc->d_ino);
dep->d_off = ent_pos;
dep->d_reclen = (unsigned short) reclen;
memcpy(dep->d_name, d_desc->d_name, len);
dep->d_name[len] = '\0';
tmpbuf_off += reclen;
}
put_block(bp, DIRECTORY_BLOCK);
if (done)
break;
}
if (tmpbuf_off != 0) {
r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) userbuf_off,
(vir_bytes) getdents_buf, (size_t) tmpbuf_off, D);
if (r != OK) {
put_inode(rip);
return(r);
}
userbuf_off += tmpbuf_off;
}
if (done && userbuf_off == 0)
r = EINVAL; /* The user's buffer is too small */
else {
fs_m_out.RES_NBYTES = userbuf_off;
fs_m_out.RES_SEEK_POS_LO = new_pos;
rip->i_update |= ATIME;
rip->i_dirt = DIRTY;
r = OK;
}
put_inode(rip); /* release the inode */
return(r);
}

125
servers/ext2/stadir.c Normal file
View file

@ -0,0 +1,125 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/statvfs.h>
#include "inode.h"
#include "super.h"
#include <minix/vfsif.h>
/*===========================================================================*
* stat_inode *
*===========================================================================*/
PRIVATE int stat_inode(
register struct inode *rip, /* pointer to inode to stat */
endpoint_t who_e, /* Caller endpoint */
cp_grant_id_t gid /* grant for the stat buf */
)
{
/* Common code for stat and fstat system calls. */
struct stat statbuf;
mode_t mo;
int r, s;
/* Update the atime, ctime, and mtime fields in the inode, if need be. */
if (rip->i_update) update_times(rip);
/* Fill in the statbuf struct. */
mo = rip->i_mode & I_TYPE;
/* true iff special */
s = (mo == I_CHAR_SPECIAL || mo == I_BLOCK_SPECIAL);
statbuf.st_dev = rip->i_dev;
statbuf.st_ino = rip->i_num;
statbuf.st_mode = rip->i_mode;
statbuf.st_nlink = rip->i_links_count;
statbuf.st_uid = rip->i_uid;
statbuf.st_gid = rip->i_gid;
statbuf.st_rdev = (s ? rip->i_block[0] : NO_DEV);
statbuf.st_size = rip->i_size;
statbuf.st_atime = rip->i_atime;
statbuf.st_mtime = rip->i_mtime;
statbuf.st_ctime = rip->i_ctime;
/* Copy the struct to user space. */
r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf,
(size_t) sizeof(statbuf), D);
return(r);
}
/*===========================================================================*
* fs_fstatfs *
*===========================================================================*/
PUBLIC int fs_fstatfs()
{
struct statfs st;
struct inode *rip;
int r;
if((rip = find_inode(fs_dev, ROOT_INODE)) == NULL)
return(EINVAL);
st.f_bsize = rip->i_sp->s_block_size;
/* Copy the struct to user space. */
r = sys_safecopyto(fs_m_in.m_source, (cp_grant_id_t) fs_m_in.REQ_GRANT,
(vir_bytes) 0, (vir_bytes) &st, (size_t) sizeof(st), D);
return(r);
}
/*===========================================================================*
* fs_stat *
*===========================================================================*/
PUBLIC int fs_stat()
{
register int r; /* return value */
register struct inode *rip; /* target inode */
if ((rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
r = stat_inode(rip, fs_m_in.m_source, (cp_grant_id_t) fs_m_in.REQ_GRANT);
put_inode(rip); /* release the inode */
return(r);
}
/*===========================================================================*
* fs_statvfs *
*===========================================================================*/
PUBLIC int fs_statvfs()
{
struct statvfs st;
struct super_block *sp;
int r;
sp = get_super(fs_dev);
st.f_bsize = sp->s_block_size;
st.f_frsize = sp->s_block_size;
st.f_blocks = sp->s_blocks_count;
st.f_bfree = sp->s_free_blocks_count;
st.f_bavail = sp->s_free_blocks_count - sp->s_r_blocks_count;
st.f_files = sp->s_inodes_count;
st.f_ffree = sp->s_free_inodes_count;
st.f_favail = sp->s_free_inodes_count;
st.f_fsid = fs_dev;
st.f_flag = (sp->s_rd_only == 1 ? ST_RDONLY : 0);
st.f_flag |= ST_NOTRUNC;
st.f_namemax = NAME_MAX;
/* Copy the struct to user space. */
r = sys_safecopyto(fs_m_in.m_source, fs_m_in.REQ_GRANT, 0, (vir_bytes) &st,
(phys_bytes) sizeof(st), D);
return(r);
}

446
servers/ext2/super.c Normal file
View file

@ -0,0 +1,446 @@
/* This file manages the super block structure.
*
* The entry points into this file are
* get_super: search the 'superblock' table for a device
* read_super: read a superblock
*
* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <string.h>
#include <stdlib.h>
#include <minix/com.h>
#include <minix/u64.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
#include "const.h"
FORWARD _PROTOTYPE( off_t ext2_max_size, (int block_size) );
FORWARD _PROTOTYPE( u32_t ext2_count_dirs, (struct super_block *sp) );
FORWARD _PROTOTYPE( void super_copy, (register struct super_block *dest,
register struct super_block *source));
FORWARD _PROTOTYPE( void copy_group_descriptors,
(register struct group_desc *dest_array,
register struct group_desc *source_array,
unsigned int ngroups));
PRIVATE off_t super_block_offset;
/*===========================================================================*
* get_super *
*===========================================================================*/
PUBLIC struct super_block *get_super(
dev_t dev /* device number whose super_block is sought */
)
{
if (dev == NO_DEV)
panic("request for super_block of NO_DEV");
if (superblock->s_dev != dev)
panic("wrong superblock", (int) dev);
return(superblock);
}
/*===========================================================================*
* get_block_size *
*===========================================================================*/
PUBLIC unsigned int get_block_size(dev_t dev)
{
if (dev == NO_DEV)
panic("request for block size of NO_DEV");
return(fs_block_size);
}
PRIVATE struct group_desc *ondisk_group_descs;
/*===========================================================================*
* read_super *
*===========================================================================*/
PUBLIC int read_super(sp)
register struct super_block *sp; /* pointer to a superblock */
{
/* Read a superblock. */
dev_t dev;
int r;
/* group descriptors, sp->s_group_desc points to this. */
static struct group_desc *group_descs;
char *buf;
block_t gd_size; /* group descriptors table size in blocks */
int gdt_position;
dev = sp->s_dev; /* save device (will be overwritten by copy) */
if (dev == NO_DEV)
panic("request for super_block of NO_DEV");
if (opt.block_with_super == 0) {
super_block_offset = SUPER_BLOCK_BYTES;
} else {
/* The block number here uses 1k units */
super_block_offset = opt.block_with_super * 1024;
}
STATICINIT(ondisk_superblock, sizeof(struct super_block));
if (!sp || !ondisk_superblock)
panic("can't allocate memory for super_block buffers");
r = block_dev_io(MFS_DEV_READ, dev, SELF_E,
(char*) ondisk_superblock, cvu64(super_block_offset),
_MIN_BLOCK_SIZE);
if (r != _MIN_BLOCK_SIZE)
return(EINVAL);
super_copy(sp, ondisk_superblock);
sp->s_dev = NO_DEV; /* restore later */
if (sp->s_magic != SUPER_MAGIC)
return(EINVAL);
sp->s_block_size = 1024*(1<<sp->s_log_block_size);
if (sp->s_block_size < _MIN_BLOCK_SIZE
|| sp->s_block_size >_MAX_BLOCK_SIZE) {
return(EINVAL);
printf("data block size is too large\n");
}
if ((sp->s_block_size % 512) != 0)
return(EINVAL);
if (SUPER_SIZE_D > sp->s_block_size)
return(EINVAL);
/* Variable added for convinience (i_blocks counts 512-byte blocks). */
sp->s_sectors_in_block = sp->s_block_size / 512;
/* TODO: this code is for revision 1 (but bw compatible with 0)
* inode must be power of 2 and smaller, than block size.
*/
if (EXT2_INODE_SIZE(sp) & (EXT2_INODE_SIZE(sp) - 1) != 0
|| EXT2_INODE_SIZE(sp) > sp->s_block_size) {
printf("superblock->s_inode_size is incorrect...\n");
return(EINVAL);
}
sp->s_blocksize_bits = sp->s_log_block_size + 10;
sp->s_max_size = ext2_max_size(sp->s_block_size);
sp->s_inodes_per_block = sp->s_block_size / EXT2_INODE_SIZE(sp);
if (sp->s_inodes_per_block == 0 || sp->s_inodes_per_group == 0) {
printf("either inodes_per_block or inodes_per_group count is 0\n");
return(EINVAL);
}
sp->s_itb_per_group = sp->s_inodes_per_group / sp->s_inodes_per_block;
sp->s_desc_per_block = sp->s_block_size / sizeof(struct group_desc);
sp->s_groups_count = ((sp->s_blocks_count - sp->s_first_data_block - 1)
/ sp->s_blocks_per_group) + 1;
/* ceil(groups_count/desc_per_block) */
sp->s_gdb_count = (sp->s_groups_count + sp->s_desc_per_block - 1)
/ sp->s_desc_per_block;
gd_size = sp->s_gdb_count * sp->s_block_size;
buf = 0;
STATICINIT(buf, gd_size);
group_descs = (struct group_desc *) buf;
buf = 0;
STATICINIT(buf, gd_size);
ondisk_group_descs = (struct group_desc *) buf;
if (!group_descs || !ondisk_group_descs)
panic("can't allocate memory for gdt buffer");
/* s_first_data_block (block number, where superblock is stored)
* is 1 for 1Kb blocks and 0 for larger blocks.
* For fs with 1024-byte blocks first 1024 bytes (block0) used by MBR,
* and block1 stores superblock. When block size is larger, block0 stores
* both MBR and superblock, but gdt lives in next block anyway.
* If sb=N was specified, then gdt is stored in N+1 block, the block number
* here uses 1k units.
*
*/
if (opt.block_with_super == 0) {
gdt_position = (sp->s_first_data_block + 1) * sp->s_block_size;
} else {
gdt_position = (opt.block_with_super + 1) * 1024;
}
r = block_dev_io(MFS_DEV_READ, dev, SELF_E,
(char*) ondisk_group_descs, cvu64(gdt_position),
gd_size);
if (r != gd_size) {
printf("Can not read group descriptors\n");
return(EINVAL);
}
/* TODO: check descriptors we just read */
copy_group_descriptors(group_descs, ondisk_group_descs, sp->s_groups_count);
sp->s_group_desc = group_descs;
/* Make a few basic checks to see if super block looks reasonable. */
if (sp->s_inodes_count < 1 || sp->s_blocks_count < 1) {
printf("not enough inodes or data blocks, \n");
return(EINVAL);
}
sp->s_dirs_counter = ext2_count_dirs(sp);
/* Start block search from this block.
* We skip superblock (1 block), group descriptors blocks (sp->s_gdb_count)
* block and inode bitmaps (2 blocks) and inode table.
*/
sp->s_bsearch = sp->s_first_data_block + 1 + sp->s_gdb_count + 2
+ sp->s_itb_per_group;
sp->s_igsearch = 0;
sp->s_dev = dev; /* restore device number */
return(OK);
}
/*===========================================================================*
* write_super *
*===========================================================================*/
PUBLIC void write_super(sp)
struct super_block *sp; /* pointer to a superblock */
{
/* Write a superblock and gdt. */
int r;
block_t gd_size; /* group descriptors table size in blocks */
int gdt_position;
if (sp->s_rd_only)
panic("can't write superblock on read-only filesys.");
if (sp->s_dev == NO_DEV)
panic("request to write super_block, but NO_DEV");
super_copy(ondisk_superblock, sp);
r = block_dev_io(MFS_DEV_WRITE, sp->s_dev, SELF_E,
sp, cvu64(super_block_offset), SUPER_SIZE_D);
if (r != SUPER_SIZE_D)
printf("ext2: Warning, failed to write superblock to the disk!\n");
if (group_descriptors_dirty == DIRTY) {
/* Locate the appropriate super_block. */
gd_size = sp->s_gdb_count * sp->s_block_size;
if (opt.block_with_super == 0) {
gdt_position = (sp->s_first_data_block + 1) * sp->s_block_size;
} else {
gdt_position = (opt.block_with_super + 1) * 1024;
}
copy_group_descriptors(ondisk_group_descs, sp->s_group_desc,
sp->s_groups_count);
r = block_dev_io(MFS_DEV_WRITE, sp->s_dev, SELF_E,
(char*) ondisk_group_descs, cvu64(gdt_position),
gd_size);
if (r != gd_size) {
printf("Can not write group descriptors\n");
}
group_descriptors_dirty = CLEAN;
}
}
/*===========================================================================*
* get_group_desc *
*===========================================================================*/
struct group_desc* get_group_desc(unsigned int bnum)
{
if (bnum >= superblock->s_groups_count) {
printf("ext2, get_group_desc: wrong bnum (%d) requested\n", bnum);
return NULL;
}
return &superblock->s_group_desc[bnum];
}
PRIVATE u32_t ext2_count_dirs(struct super_block *sp)
{
u32_t count = 0;
int i;
for (i = 0; i < sp->s_groups_count; i++) {
struct group_desc *desc = get_group_desc(i);
if (!desc)
continue; /* TODO: fail? */
count += desc->used_dirs_count;
}
return count;
}
/*===========================================================================*
* ext2_max_size *
*===========================================================================*/
/* There are several things, which affect max filesize:
* - inode.i_blocks (512-byte blocks) is limited to (2^32 - 1).
* - number of addressed direct, single, double and triple indirect blocks.
* Number of addressed blocks depends on block_size only, thus unlike in
* linux (ext2_max_size) we do not make calculations, but use constants
* for different block sizes. Calculations (gcc code) are commented.
* Note: linux ext2_max_size makes calculated based on shifting, not
* arithmetics.
* (!!!)Note: constants hardly tight to EXT2_NDIR_BLOCKS, but I doubt its value
* will be changed someday. So if it's changed, then just recalculate constatns.
* Anyway this function is safe for any change.
* Note: there is also limitation from VFS (to LONG_MAX, i.e. 2GB).
*/
PRIVATE off_t ext2_max_size(int block_size)
{
/* 12 is EXT2_NDIR_BLOCKS used in calculations. */
if (EXT2_NDIR_BLOCKS != 12)
panic("ext2_max_size needs modification!");
switch(block_size) {
case 1024: return LONG_MAX; /* actually 17247252480 */
case 2048: return LONG_MAX; /* 275415851008 */
case 4096: return LONG_MAX; /* 2194719883264 */
default: {
ext2_debug("ext2_max_size: Unsupported block_size! \
Assuming bs is 1024 bytes\n");
return 67383296L;
}
}
#if 0
long addr_in_block = block_size/4; /* 4 bytes per addr */
long sectors_in_block = block_size/512;
long long meta_blocks; /* single, double and triple indirect blocks */
unsigned long long out_range_s; /* max blocks addressed by inode */
unsigned long long max_bytes;
unsigned long long upper_limit;
/* 1 indirect block, 1 + addr_in_block dindirect and 1 + addr_in_block +
* + addr_in_block*addr_in_block triple indirect blocks */
meta_blocks = 2*addr_in_block + addr_in_block*addr_in_block + 3;
out_range_s = EXT2_NDIR_BLOCKS + addr_in_block + addr_in_block * addr_in_block
+ addr_in_block * addr_in_block * addr_in_block;
max_bytes = out_range_s * block_size;
upper_limit = (1LL << 32) - 1; /* max 512-byte blocks by i_blocks */
upper_limit /= sectors_in_block; /* total block_size blocks */
upper_limit -= meta_blocks; /* total data blocks */
upper_limit *= (long long)block_size; /* max size in bytes */
if (max_bytes > upper_limit)
max_bytes = upper_limit;
/* Limit s_max_size to LONG_MAX */
if (max_bytes > LONG_MAX)
max_bytes = LONG_MAX;
return max_bytes;
#endif
}
/*===========================================================================*
* super_copy *
*===========================================================================*/
PRIVATE void super_copy(
register struct super_block *dest,
register struct super_block *source
)
/* Note: we don't convert stuff, used in ext3. */
{
/* Copy super_block to the in-core table, swapping bytes if need be. */
if (le_CPU) {
/* Just use memcpy */
memcpy(dest, source, SUPER_SIZE_D);
return;
}
dest->s_inodes_count = conv4(le_CPU, source->s_inodes_count);
dest->s_blocks_count = conv4(le_CPU, source->s_blocks_count);
dest->s_r_blocks_count = conv4(le_CPU, source->s_r_blocks_count);
dest->s_free_blocks_count = conv4(le_CPU, source->s_free_blocks_count);
dest->s_free_inodes_count = conv4(le_CPU, source->s_free_inodes_count);
dest->s_first_data_block = conv4(le_CPU, source->s_first_data_block);
dest->s_log_block_size = conv4(le_CPU, source->s_log_block_size);
dest->s_log_frag_size = conv4(le_CPU, source->s_log_frag_size);
dest->s_blocks_per_group = conv4(le_CPU, source->s_blocks_per_group);
dest->s_frags_per_group = conv4(le_CPU, source->s_frags_per_group);
dest->s_inodes_per_group = conv4(le_CPU, source->s_inodes_per_group);
dest->s_mtime = conv4(le_CPU, source->s_mtime);
dest->s_wtime = conv4(le_CPU, source->s_wtime);
dest->s_mnt_count = conv2(le_CPU, source->s_mnt_count);
dest->s_max_mnt_count = conv2(le_CPU, source->s_max_mnt_count);
dest->s_magic = conv2(le_CPU, source->s_magic);
dest->s_state = conv2(le_CPU, source->s_state);
dest->s_errors = conv2(le_CPU, source->s_errors);
dest->s_minor_rev_level = conv2(le_CPU, source->s_minor_rev_level);
dest->s_lastcheck = conv4(le_CPU, source->s_lastcheck);
dest->s_checkinterval = conv4(le_CPU, source->s_checkinterval);
dest->s_creator_os = conv4(le_CPU, source->s_creator_os);
dest->s_rev_level = conv4(le_CPU, source->s_rev_level);
dest->s_def_resuid = conv2(le_CPU, source->s_def_resuid);
dest->s_def_resgid = conv2(le_CPU, source->s_def_resgid);
dest->s_first_ino = conv4(le_CPU, source->s_first_ino);
dest->s_inode_size = conv2(le_CPU, source->s_inode_size);
dest->s_block_group_nr = conv2(le_CPU, source->s_block_group_nr);
dest->s_feature_compat = conv4(le_CPU, source->s_feature_compat);
dest->s_feature_incompat = conv4(le_CPU, source->s_feature_incompat);
dest->s_feature_ro_compat = conv4(le_CPU, source->s_feature_ro_compat);
memcpy(dest->s_uuid, source->s_uuid, sizeof(dest->s_uuid));
memcpy(dest->s_volume_name, source->s_volume_name,
sizeof(dest->s_volume_name));
memcpy(dest->s_last_mounted, source->s_last_mounted,
sizeof(dest->s_last_mounted));
dest->s_algorithm_usage_bitmap =
conv4(le_CPU, source->s_algorithm_usage_bitmap);
dest->s_prealloc_blocks = source->s_prealloc_blocks;
dest->s_prealloc_dir_blocks = source->s_prealloc_dir_blocks;
dest->s_padding1 = conv2(le_CPU, source->s_padding1);
}
/*===========================================================================*
* gd_copy *
*===========================================================================*/
PRIVATE void gd_copy(
register struct group_desc *dest,
register struct group_desc *source
)
{
/* Copy super_block to the in-core table, swapping bytes if need be. */
if (le_CPU) {
/* Just use memcpy */
memcpy(dest, source, sizeof(struct group_desc));
return;
}
dest->block_bitmap = conv4(le_CPU, source->block_bitmap);
dest->inode_bitmap = conv4(le_CPU, source->inode_bitmap);
dest->inode_table = conv4(le_CPU, source->inode_table);
dest->free_blocks_count = conv2(le_CPU, source->free_blocks_count);
dest->free_inodes_count = conv2(le_CPU, source->free_inodes_count);
dest->used_dirs_count = conv2(le_CPU, source->used_dirs_count);
}
/*===========================================================================*
* copy_group_descriptors *
*===========================================================================*/
PRIVATE void copy_group_descriptors(
register struct group_desc *dest_array,
register struct group_desc *source_array,
unsigned int ngroups
)
{
int i;
for (i = 0; i < ngroups; i++)
gd_copy(&dest_array[i], &source_array[i]);
}

128
servers/ext2/super.h Normal file
View file

@ -0,0 +1,128 @@
/* Super block table. The root file system and every mounted file system
* has an entry here. The entry holds information about the sizes of the bit
* maps and inodes.
*
* A super_block slot is free if s_dev == NO_DEV.
*
*/
#ifndef EXT2_SUPER_H
#define EXT2_SUPER_H
/* super_block (on-disk part) was taken from linux/include/linux/ext2_fs.h */
EXTERN struct super_block {
u32_t s_inodes_count; /* Inodes count */
u32_t s_blocks_count; /* Blocks count */
u32_t s_r_blocks_count; /* Reserved blocks count */
u32_t s_free_blocks_count; /* Free blocks count */
u32_t s_free_inodes_count; /* Free inodes count */
u32_t s_first_data_block; /* First Data Block */
u32_t s_log_block_size; /* Block size */
u32_t s_log_frag_size; /* Fragment size */
u32_t s_blocks_per_group; /* # Blocks per group */
u32_t s_frags_per_group; /* # Fragments per group */
u32_t s_inodes_per_group; /* # Inodes per group */
u32_t s_mtime; /* Mount time */
u32_t s_wtime; /* Write time */
u16_t s_mnt_count; /* Mount count */
u16_t s_max_mnt_count; /* Maximal mount count */
u16_t s_magic; /* Magic signature */
u16_t s_state; /* File system state */
u16_t s_errors; /* Behaviour when detecting errors */
u16_t s_minor_rev_level; /* minor revision level */
u32_t s_lastcheck; /* time of last check */
u32_t s_checkinterval; /* max. time between checks */
u32_t s_creator_os; /* OS */
u32_t s_rev_level; /* Revision level */
u16_t s_def_resuid; /* Default uid for reserved blocks */
u16_t s_def_resgid; /* Default gid for reserved blocks */
/*
* These fields are for EXT2_DYNAMIC_REV superblocks only.
*
* Note: the difference between the compatible feature set and
* the incompatible feature set is that if there is a bit set
* in the incompatible feature set that the kernel doesn't
* know about, it should refuse to mount the filesystem.
*
* e2fsck's requirements are more strict; if it doesn't know
* about a feature in either the compatible or incompatible
* feature set, it must abort and not try to meddle with
* things it doesn't understand...
*/
u32_t s_first_ino; /* First non-reserved inode */
u16_t s_inode_size; /* size of inode structure */
u16_t s_block_group_nr; /* block group # of this superblock */
u32_t s_feature_compat; /* compatible feature set */
u32_t s_feature_incompat; /* incompatible feature set */
u32_t s_feature_ro_compat; /* readonly-compatible feature set */
u8_t s_uuid[16]; /* 128-bit uuid for volume */
char s_volume_name[16]; /* volume name */
char s_last_mounted[64]; /* directory where last mounted */
u32_t s_algorithm_usage_bitmap; /* For compression */
/*
* Performance hints. Directory preallocation should only
* happen if the EXT2_COMPAT_PREALLOC flag is on.
*/
u8_t s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
u8_t s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
u16_t s_padding1;
/*
* Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
*/
u8_t s_journal_uuid[16]; /* uuid of journal superblock */
u32_t s_journal_inum; /* inode number of journal file */
u32_t s_journal_dev; /* device number of journal file */
u32_t s_last_orphan; /* start of list of inodes to delete */
u32_t s_hash_seed[4]; /* HTREE hash seed */
u8_t s_def_hash_version; /* Default hash version to use */
u8_t s_reserved_char_pad;
u16_t s_reserved_word_pad;
u32_t s_default_mount_opts;
u32_t s_first_meta_bg; /* First metablock block group */
u32_t s_reserved[190]; /* Padding to the end of the block */
/* The following items are only used when the super_block is in memory. */
u32_t s_inodes_per_block; /* Number of inodes per block */
u32_t s_itb_per_group; /* Number of inode table blocks per group */
u32_t s_gdb_count; /* Number of group descriptor blocks */
u32_t s_desc_per_block; /* Number of group descriptors per block */
u32_t s_groups_count; /* Number of groups in the fs */
u8_t s_blocksize_bits; /* Used to calculate offsets
* (e.g. inode block),
* always s_log_block_size+10.
*/
struct group_desc *s_group_desc; /* Group descriptors read into RAM */
u16_t s_block_size; /* block size in bytes. */
u16_t s_sectors_in_block; /* s_block_size / 512 */
u32_t s_max_size; /* maximum file size on this device */
dev_t s_dev; /* whose super block is this? */
int s_rd_only; /* set to 1 if file sys mounted read only */
block_t s_bsearch; /* all data blocks below this block are in use*/
int s_igsearch; /* all groups below this one have no free inodes */
char s_is_root;
u32_t s_dirs_counter;
} *superblock, *ondisk_superblock;
/* Structure of a blocks group descriptor.
* On disk stored in little endian format.
*/
struct group_desc
{
u32_t block_bitmap; /* Blocks bitmap block */
u32_t inode_bitmap; /* Inodes bitmap block */
u32_t inode_table; /* Inodes table block */
u16_t free_blocks_count; /* Free blocks count */
u16_t free_inodes_count; /* Free inodes count */
u16_t used_dirs_count; /* Directories count */
u16_t pad;
u32_t reserved[3];
};
#define IMAP 0 /* operating on the inode bit map */
#define BMAP 1 /* operating on the block bit map */
#define IMAPD 2 /* operating on the inode bit map, inode is dir */
#endif /* EXT2_SUPER_H */

50
servers/ext2/table.c Normal file
View file

@ -0,0 +1,50 @@
/* This file contains the table used to map system call numbers onto the
* routines that perform them.
*
* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#define _TABLE
#include "fs.h"
#include "inode.h"
#include "buf.h"
#include "super.h"
#include "drivers.h"
PUBLIC _PROTOTYPE (int (*fs_call_vec[]), (void) ) = {
no_sys, /* 0 not used */
no_sys, /* 1 */ /* Was: fs_getnode */
fs_putnode, /* 2 */
fs_slink, /* 3 */
fs_ftrunc, /* 4 */
fs_chown, /* 5 */
fs_chmod, /* 6 */
fs_inhibread, /* 7 */
fs_stat, /* 8 */
fs_utime, /* 9 */
fs_fstatfs, /* 10 */
fs_breadwrite, /* 11 */
fs_breadwrite, /* 12 */
fs_unlink, /* 13 */
fs_unlink, /* 14 */
fs_unmount, /* 15 */
fs_sync, /* 16 */
fs_new_driver, /* 17 */
fs_flush, /* 18 */
fs_readwrite, /* 19 */
fs_readwrite, /* 20 */
fs_mknod, /* 21 */
fs_mkdir, /* 22 */
fs_create, /* 23 */
fs_link, /* 24 */
fs_rename, /* 25 */
fs_lookup, /* 26 */
fs_mountpoint, /* 27 */
fs_readsuper, /* 28 */
no_sys, /* 29 */ /* Was: fs_newnode */
fs_rdlink, /* 30 */
fs_getdents, /* 31 */
fs_statvfs, /* 32 */
};

36
servers/ext2/time.c Normal file
View file

@ -0,0 +1,36 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <minix/callnr.h>
#include <minix/com.h>
#include "inode.h"
#include <minix/vfsif.h>
/*===========================================================================*
* fs_utime *
*===========================================================================*/
PUBLIC int fs_utime()
{
register struct inode *rip;
register int r;
/* Temporarily open the file. */
if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
return(EINVAL);
/* Only the owner of a file or the super_user can change its time. */
r = OK;
if(read_only(rip) != OK) r = EROFS; /* not even su can touch if R/O */
if(r == OK) {
rip->i_atime = fs_m_in.REQ_ACTIME;
rip->i_mtime = fs_m_in.REQ_MODTIME;
rip->i_update = CTIME; /* discard any stale ATIME and MTIME flags */
rip->i_dirt = DIRTY;
}
put_inode(rip);
return(r);
}

116
servers/ext2/type.h Normal file
View file

@ -0,0 +1,116 @@
#ifndef EXT2_TYPE_H
#define EXT2_TYPE_H
/* On the disk all attributes are stored in little endian format.
* Inode structure was taken from linux/include/linux/ext2_fs.h.
*/
typedef struct {
u16_t i_mode; /* File mode */
u16_t i_uid; /* Low 16 bits of Owner Uid */
u32_t i_size; /* Size in bytes */
u32_t i_atime; /* Access time */
u32_t i_ctime; /* Creation time */
u32_t i_mtime; /* Modification time */
u32_t i_dtime; /* Deletion Time */
u16_t i_gid; /* Low 16 bits of Group Id */
u16_t i_links_count; /* Links count */
u32_t i_blocks; /* Blocks count */
u32_t i_flags; /* File flags */
union {
struct {
u32_t l_i_reserved1;
} linux1;
struct {
u32_t h_i_translator;
} hurd1;
struct {
u32_t m_i_reserved1;
} masix1;
} osd1; /* OS dependent 1 */
u32_t i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
u32_t i_generation; /* File version (for NFS) */
u32_t i_file_acl; /* File ACL */
u32_t i_dir_acl; /* Directory ACL */
u32_t i_faddr; /* Fragment address */
union {
struct {
u8_t l_i_frag; /* Fragment number */
u8_t l_i_fsize; /* Fragment size */
u16_t i_pad1;
u16_t l_i_uid_high; /* these 2 fields */
u16_t l_i_gid_high; /* were reserved2[0] */
u32_t l_i_reserved2;
} linux2;
struct {
u8_t h_i_frag; /* Fragment number */
u8_t h_i_fsize; /* Fragment size */
u16_t h_i_mode_high;
u16_t h_i_uid_high;
u16_t h_i_gid_high;
u32_t h_i_author;
} hurd2;
struct {
u8_t m_i_frag; /* Fragment number */
u8_t m_i_fsize; /* Fragment size */
u16_t m_pad1;
u32_t m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
} d_inode;
/* Part of on disk directory (entry description).
* It includes all fields except name (since size is unknown.
* In revision 0 name_len is u16_t (here is structure of rev >= 0.5,
* where name_len was truncated with the upper 8 bit to add file_type).
* MIN_DIR_ENTRY_SIZE depends on this structure.
*/
struct ext2_disk_dir_desc {
u32_t d_ino;
u16_t d_rec_len;
u8_t d_name_len;
u8_t d_file_type;
char d_name[1];
};
/* Current position in block */
#define CUR_DISC_DIR_POS(cur_desc, base) ((char*)cur_desc - (char*)base)
/* Return pointer to the next dentry */
#define NEXT_DISC_DIR_DESC(cur_desc) ((struct ext2_disk_dir_desc*)\
((char*)cur_desc + cur_desc->d_rec_len))
/* Return next dentry's position in block */
#define NEXT_DISC_DIR_POS(cur_desc, base) (cur_desc->d_rec_len +\
CUR_DISC_DIR_POS(cur_desc, base))
struct buf {
/* Data portion of the buffer. */
union fsdata_u *bp;
/* Header portion of the buffer. */
struct buf *b_next; /* used to link all free bufs in a chain */
struct buf *b_prev; /* used to link all free bufs the other way */
struct buf *b_hash; /* used to link bufs on hash chains */
block_t b_blocknr; /* block number of its (minor) device */
dev_t b_dev; /* major | minor device where block resides */
char b_dirt; /* CLEAN or DIRTY */
char b_count; /* number of users of this buffer */
unsigned int b_bytes; /* Number of bytes allocated in bp */
};
/* Structure with options affecting global behavior. */
struct opt {
int use_orlov; /* Bool: Use Orlov allocator */
/* In ext2 there are reserved blocks, which can be used by super user only or
* user specified by resuid/resgid. Right now we can't check what user
* requested operation (VFS limitation), so it's a small warkaround.
*/
int mfsalloc; /* Bool: use mfslike allocator */
int use_reserved_blocks; /* Bool: small workaround */
unsigned int block_with_super;/* Int: where to read super block,
* uses 1k units. */
int use_prealloc; /* Bool: use preallocation */
};
#endif /* EXT2_TYPE_H */

255
servers/ext2/utility.c Normal file
View file

@ -0,0 +1,255 @@
/* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include "buf.h"
#include "inode.h"
#include "super.h"
/*===========================================================================*
* no_sys *
*===========================================================================*/
PUBLIC int no_sys()
{
/* Somebody has used an illegal system call number */
printf("no_sys: invalid call %d\n", req_nr);
return(EINVAL);
}
/*===========================================================================*
* conv2 *
*===========================================================================*/
PUBLIC unsigned conv2(norm, w)
int norm; /* TRUE if no swap, FALSE for byte swap */
int w; /* promotion of 16-bit word to be swapped */
{
/* Possibly swap a 16-bit word between 8086 and 68000 byte order. */
if (norm) return( (unsigned) w & 0xFFFF);
return( ((w&BYTE) << 8) | ( (w>>8) & BYTE));
}
/*===========================================================================*
* conv4 *
*===========================================================================*/
PUBLIC long conv4(norm, x)
int norm; /* TRUE if no swap, FALSE for byte swap */
long x; /* 32-bit long to be byte swapped */
{
/* Possibly swap a 32-bit long between 8086 and 68000 byte order. */
unsigned lo, hi;
long l;
if (norm) return(x); /* byte order was already ok */
lo = conv2(FALSE, (int) x & 0xFFFF); /* low-order half, byte swapped */
hi = conv2(FALSE, (int) (x>>16) & 0xFFFF); /* high-order half, swapped */
l = ( (long) lo <<16) | hi;
return(l);
}
/*===========================================================================*
* clock_time *
*===========================================================================*/
PUBLIC time_t clock_time()
{
/* This routine returns the time in seconds since 1.1.1970. MINIX is an
* astrophysically naive system that assumes the earth rotates at a constant
* rate and that such things as leap seconds do not exist.
*/
register int k;
clock_t uptime;
time_t boottime;
if ( (k=getuptime2(&uptime, &boottime)) != OK)
panic("clock_time: getuptme2 failed: %d", k);
return( (time_t) (boottime + (uptime/sys_hz())));
}
/*===========================================================================*
* mfs_min *
*===========================================================================*/
PUBLIC int min(unsigned int l, unsigned int r)
{
if(r >= l) return(l);
return(r);
}
/*===========================================================================*
* mfs_nul *
*===========================================================================*/
PUBLIC void mfs_nul_f(char *file, int line, char *str, unsigned int len,
unsigned int maxlen)
{
if(len < maxlen && str[len-1] != '\0') {
printf("ext2 %s:%d string (length %d, maxlen %d) not null-terminated\n",
file, line, len, maxlen);
}
}
#define MYASSERT(c) if(!(c)) { printf("ext2:%s:%d: sanity check: %s failed\n", \
file, line, #c); panic("sanity check " #c " failed: %d", __LINE__); }
/*===========================================================================*
* sanity_check *
*===========================================================================*/
PUBLIC void sanitycheck(char *file, int line)
{
MYASSERT(SELF_E > 0);
if(superblock->s_dev != NO_DEV) {
MYASSERT(superblock->s_dev == fs_dev);
MYASSERT(superblock->s_block_size == fs_block_size);
} else {
MYASSERT(_MIN_BLOCK_SIZE == fs_block_size);
}
}
/*===========================================================================*
* ansi_strcmp *
*===========================================================================*/
PUBLIC int ansi_strcmp(register const char* ansi_s, register const char *s2,
register size_t ansi_s_length)
{
/* Compare non null-terminated string ansi_s (length=ansi_s_length)
* with C-string s2.
* It returns 0 if strings are equal, otherwise -1 is returned.
*/
if (ansi_s_length) {
do {
if (*s2 == '\0')
return -1;
if (*ansi_s++ != *s2++)
return -1;
} while (--ansi_s_length > 0);
if (*s2 == '\0')
return 0;
else
return -1;
}
return 0;
}
/*===========================================================================*
* setbit *
*===========================================================================*/
PUBLIC bit_t setbit(bitchunk_t *bitmap, bit_t max_bits, unsigned int word)
{
/* Find free bit in bitmap and set. Return number of the bit,
* if failed return -1.
*/
bitchunk_t *wptr, *wlim;
bit_t b = -1;
/* TODO: do we need to add 1? I saw a situation, when it was
* required, and since we check bit number with max_bits it
* should be safe.
*/
wlim = &bitmap[FS_BITMAP_CHUNKS(max_bits >> 3)];
/* Iterate over the words in block. */
for (wptr = &bitmap[word]; wptr < wlim; wptr++) {
bit_t i;
bitchunk_t k;
/* Does this word contain a free bit? */
if (*wptr == (bitchunk_t) ~0)
continue;
/* Find and allocate the free bit. */
k = (int) *wptr;
for (i = 0; (k & (1 << i)) != 0; ++i) {}
/* Bit number from the start of the bit map. */
b = (wptr - &bitmap[0]) * FS_BITCHUNK_BITS + i;
/* Don't allocate bits beyond the end of the map. */
if (b >= max_bits) {
b = -1;
continue;
}
/* Allocate bit number. */
k |= 1 << i;
*wptr = (int) k;
break;
}
return b;
}
/*===========================================================================*
* setbyte *
*===========================================================================*/
PUBLIC bit_t setbyte(bitchunk_t *bitmap, bit_t max_bits, unsigned int word)
{
/* Find free byte in bitmap and set it. Return number of the starting bit,
* if failed return -1.
*/
unsigned char *wptr, *wlim;
bit_t b = -1;
wptr = (unsigned char*) &bitmap[0];
/* TODO: do we need to add 1? I saw a situation, when it was
* required, and since we check bit number with max_bits it
* should be safe.
*/
wlim = &wptr[(max_bits >> 3)];
/* Iterate over the words in block. */
for ( ; wptr < wlim; wptr++) {
/* Is it a free byte? */
if (*wptr | 0)
continue;
/* Bit number from the start of the bit map. */
b = (wptr - (unsigned char*) &bitmap[0]) * CHAR_BIT;
/* Don't allocate bits beyond the end of the map. */
if (b + CHAR_BIT >= max_bits) {
b = -1;
continue;
}
/* Allocate byte number. */
*wptr = (unsigned char) ~0;
break;
}
return b;
}
/*===========================================================================*
* unsetbit *
*===========================================================================*/
PUBLIC int unsetbit(bitchunk_t *bitmap, bit_t bit)
{
/* Unset specified bit. If requested bit is already free return -1,
* otherwise return 0.
*/
unsigned int word; /* bit_returned word in bitmap */
bitchunk_t k, mask;
word = bit / FS_BITCHUNK_BITS;
bit = bit % FS_BITCHUNK_BITS; /* index in word */
mask = 1 << bit;
k = (int) bitmap[word];
if (!(k & mask))
return -1;
k &= ~mask;
bitmap[word] = (int) k;
return 0;
}

375
servers/ext2/write.c Normal file
View file

@ -0,0 +1,375 @@
/* This file is the counterpart of "read.c". It contains the code for writing
* insofar as this is not contained in fs_readwrite().
*
* The entry points into this file are
* write_map: write a new block into an inode
* new_block: acquire a new block
* zero_block: overwrite a block with zeroes
*
* Created (MFS based):
* February 2010 (Evgeniy Ivanov)
*/
#include "fs.h"
#include <string.h>
#include "buf.h"
#include "inode.h"
#include "super.h"
FORWARD _PROTOTYPE( void wr_indir, (struct buf *bp, int index, block_t block) );
FORWARD _PROTOTYPE( int empty_indir, (struct buf *, struct super_block *) );
/*===========================================================================*
* write_map *
*===========================================================================*/
PUBLIC int write_map(rip, position, new_block, op)
struct inode *rip; /* pointer to inode to be changed */
off_t position; /* file address to be mapped */
block_t new_block; /* block # to be inserted */
int op; /* special actions */
{
/* Write a new block into an inode.
*
* If op includes WMAP_FREE, free the block corresponding to that position
* in the inode ('new_block' is ignored then). Also free the indirect block
* if that was the last entry in the indirect block.
* Also free the double/triple indirect block if that was the last entry in
* the double/triple indirect block.
* It's the only function which should take care about rip->i_blocks counter.
*/
int index1, index2, index3; /* indexes in single..triple indirect blocks */
long excess, block_pos;
char new_ind = 0, new_dbl = 0, new_triple = 0;
int single = 0, triple = 0;
register block_t old_block, b1, b2, b3;
struct buf *bp = NULL,
*bp_dindir = NULL,
*bp_tindir = NULL;
static char first_time = TRUE;
static long addr_in_block;
static long addr_in_block2;
static long doub_ind_s;
static long triple_ind_s;
static long out_range_s;
if (first_time) {
addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
addr_in_block2 = addr_in_block * addr_in_block;
doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
triple_ind_s = doub_ind_s + addr_in_block2;
out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
first_time = FALSE;
}
block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
rip->i_dirt = DIRTY; /* inode will be changed */
/* Is 'position' to be found in the inode itself? */
if (block_pos < EXT2_NDIR_BLOCKS) {
if (rip->i_block[block_pos] != NO_BLOCK && (op & WMAP_FREE)) {
free_block(rip->i_sp, rip->i_block[block_pos]);
rip->i_block[block_pos] = NO_BLOCK;
rip->i_blocks -= rip->i_sp->s_sectors_in_block;
} else {
rip->i_block[block_pos] = new_block;
rip->i_blocks += rip->i_sp->s_sectors_in_block;
}
return(OK);
}
/* It is not in the inode, so it must be single, double or triple indirect */
if (block_pos < doub_ind_s) {
b1 = rip->i_block[EXT2_NDIR_BLOCKS]; /* addr of single indirect block */
index1 = block_pos - EXT2_NDIR_BLOCKS;
single = TRUE;
} else if (block_pos >= out_range_s) { /* TODO: do we need it? */
return(EFBIG);
} else {
/* double or triple indirect block. At first if it's triple,
* find double indirect block.
*/
excess = block_pos - doub_ind_s;
b2 = rip->i_block[EXT2_DIND_BLOCK];
if (block_pos >= triple_ind_s) {
b3 = rip->i_block[EXT2_TIND_BLOCK];
if (b3 == NO_BLOCK && !(op & WMAP_FREE)) {
/* Create triple indirect block. */
if ( (b3 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) {
ext2_debug("failed to allocate tblock near %d\n", rip->i_block[0]);
return(ENOSPC);
}
rip->i_block[EXT2_TIND_BLOCK] = b3;
rip->i_blocks += rip->i_sp->s_sectors_in_block;
new_triple = TRUE;
}
/* 'b3' is block number for triple indirect block, either old
* or newly created.
* If there wasn't one and WMAP_FREE is set, 'b3' is NO_BLOCK.
*/
if (b3 == NO_BLOCK) {
/* WMAP_FREE and no triple indirect block - then no
* double and single indirect blocks either.
*/
b1 = b2 = NO_BLOCK;
} else {
bp_tindir = get_block(rip->i_dev, b3, (new_triple ? NO_READ : NORMAL));
if (new_triple) {
zero_block(bp_tindir);
bp_tindir->b_dirt = DIRTY;
}
excess = block_pos - triple_ind_s;
index3 = excess / addr_in_block2;
b2 = rd_indir(bp_tindir, index3);
excess = excess % addr_in_block2;
}
triple = TRUE;
}
if (b2 == NO_BLOCK && !(op & WMAP_FREE)) {
/* Create the double indirect block. */
if ( (b2 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) {
/* Release triple ind blk. */
put_block(bp_tindir, INDIRECT_BLOCK);
ext2_debug("failed to allocate dblock near %d\n", rip->i_block[0]);
return(ENOSPC);
}
if (triple) {
wr_indir(bp_tindir, index3, b2); /* update triple indir */
bp_tindir->b_dirt = DIRTY;
} else {
rip->i_block[EXT2_DIND_BLOCK] = b2;
}
rip->i_blocks += rip->i_sp->s_sectors_in_block;
new_dbl = TRUE; /* set flag for later */
}
/* 'b2' is block number for double indirect block, either old
* or newly created.
* If there wasn't one and WMAP_FREE is set, 'b2' is NO_BLOCK.
*/
if (b2 == NO_BLOCK) {
/* WMAP_FREE and no double indirect block - then no
* single indirect block either.
*/
b1 = NO_BLOCK;
} else {
bp_dindir = get_block(rip->i_dev, b2, (new_dbl ? NO_READ : NORMAL));
if (new_dbl) {
zero_block(bp_dindir);
bp_dindir->b_dirt = DIRTY;
}
index2 = excess / addr_in_block;
b1 = rd_indir(bp_dindir, index2);
index1 = excess % addr_in_block;
}
single = FALSE;
}
/* b1 is now single indirect block or NO_BLOCK; 'index' is index.
* We have to create the indirect block if it's NO_BLOCK. Unless
* we're freing (WMAP_FREE).
*/
if (b1 == NO_BLOCK && !(op & WMAP_FREE)) {
if ( (b1 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) {
/* Release dbl and triple indirect blks. */
put_block(bp_dindir, INDIRECT_BLOCK);
put_block(bp_tindir, INDIRECT_BLOCK);
ext2_debug("failed to allocate dblock near %d\n", rip->i_block[0]);
return(ENOSPC);
}
if (single) {
rip->i_block[EXT2_NDIR_BLOCKS] = b1; /* update inode single indirect */
} else {
wr_indir(bp_dindir, index2, b1); /* update dbl indir */
bp_dindir->b_dirt = DIRTY;
}
rip->i_blocks += rip->i_sp->s_sectors_in_block;
new_ind = TRUE;
}
/* b1 is indirect block's number (unless it's NO_BLOCK when we're
* freeing).
*/
if (b1 != NO_BLOCK) {
bp = get_block(rip->i_dev, b1, (new_ind ? NO_READ : NORMAL) );
if (new_ind)
zero_block(bp);
if (op & WMAP_FREE) {
if ((old_block = rd_indir(bp, index1)) != NO_BLOCK) {
free_block(rip->i_sp, old_block);
rip->i_blocks -= rip->i_sp->s_sectors_in_block;
wr_indir(bp, index1, NO_BLOCK);
}
/* Last reference in the indirect block gone? Then
* free the indirect block.
*/
if (empty_indir(bp, rip->i_sp)) {
free_block(rip->i_sp, b1);
rip->i_blocks -= rip->i_sp->s_sectors_in_block;
b1 = NO_BLOCK;
/* Update the reference to the indirect block to
* NO_BLOCK - in the double indirect block if there
* is one, otherwise in the inode directly.
*/
if (single) {
rip->i_block[EXT2_NDIR_BLOCKS] = b1;
} else {
wr_indir(bp_dindir, index2, b1);
bp_dindir->b_dirt = DIRTY;
}
}
} else {
wr_indir(bp, index1, new_block);
rip->i_blocks += rip->i_sp->s_sectors_in_block;
}
/* b1 equals NO_BLOCK only when we are freeing up the indirect block. */
bp->b_dirt = (b1 == NO_BLOCK) ? CLEAN : DIRTY;;
put_block(bp, INDIRECT_BLOCK);
}
/* If the single indirect block isn't there (or was just freed),
* see if we have to keep the double indirect block, if any.
* If we don't have to keep it, don't bother writing it out.
*/
if (b1 == NO_BLOCK && !single && b2 != NO_BLOCK &&
empty_indir(bp_dindir, rip->i_sp)) {
bp_dindir->b_dirt = CLEAN;
free_block(rip->i_sp, b2);
rip->i_blocks -= rip->i_sp->s_sectors_in_block;
b2 = NO_BLOCK;
if (triple) {
wr_indir(bp_tindir, index3, b2); /* update triple indir */
bp_tindir->b_dirt = DIRTY;
} else {
rip->i_block[EXT2_DIND_BLOCK] = b2;
}
}
/* If the double indirect block isn't there (or was just freed),
* see if we have to keep the triple indirect block, if any.
* If we don't have to keep it, don't bother writing it out.
*/
if (b2 == NO_BLOCK && triple && b3 != NO_BLOCK &&
empty_indir(bp_tindir, rip->i_sp)) {
bp_tindir->b_dirt = CLEAN;
free_block(rip->i_sp, b3);
rip->i_blocks -= rip->i_sp->s_sectors_in_block;
rip->i_block[EXT2_TIND_BLOCK] = NO_BLOCK;
}
put_block(bp_dindir, INDIRECT_BLOCK); /* release double indirect blk */
put_block(bp_tindir, INDIRECT_BLOCK); /* release triple indirect blk */
return(OK);
}
/*===========================================================================*
* wr_indir *
*===========================================================================*/
PRIVATE void wr_indir(bp, index, block)
struct buf *bp; /* pointer to indirect block */
int index; /* index into *bp */
block_t block; /* block to write */
{
/* Given a pointer to an indirect block, write one entry. */
if(bp == NULL)
panic("wr_indir() on NULL");
/* write a block into an indirect block */
bp->b_ind[index] = conv4(le_CPU, block);
}
/*===========================================================================*
* empty_indir *
*===========================================================================*/
PRIVATE int empty_indir(bp, sb)
struct buf *bp; /* pointer to indirect block */
struct super_block *sb; /* superblock of device block resides on */
{
/* Return nonzero if the indirect block pointed to by bp contains
* only NO_BLOCK entries.
*/
long addr_in_block = sb->s_block_size/4; /* 4 bytes per addr */
int i;
for(i = 0; i < addr_in_block; i++)
if(bp->b_ind[i] != NO_BLOCK)
return(0);
return(1);
}
/*===========================================================================*
* new_block *
*===========================================================================*/
PUBLIC struct buf *new_block(rip, position)
register struct inode *rip; /* pointer to inode */
off_t position; /* file pointer */
{
/* Acquire a new block and return a pointer to it. */
register struct buf *bp;
int r;
block_t b;
/* Is another block available? */
if ( (b = read_map(rip, position)) == NO_BLOCK) {
/* Check if this position follows last allocated
* block.
*/
block_t goal = NO_BLOCK;
if (rip->i_last_pos_bl_alloc != 0) {
off_t position_diff = position - rip->i_last_pos_bl_alloc;
if (rip->i_bsearch == 0) {
/* Should never happen, but not critical */
ext2_debug("warning, i_bsearch is 0, while\
i_last_pos_bl_alloc is not!");
}
if (position_diff <= rip->i_sp->s_block_size) {
goal = rip->i_bsearch + 1;
} else {
/* Non-sequential write operation,
* disable preallocation
* for this inode.
*/
rip->i_preallocation = 0;
discard_preallocated_blocks(rip);
}
}
if ( (b = alloc_block(rip, goal) ) == NO_BLOCK) {
err_code = ENOSPC;
return(NULL);
}
if ( (r = write_map(rip, position, b, 0)) != OK) {
free_block(rip->i_sp, b);
err_code = r;
ext2_debug("write_map failed\n");
return(NULL);
}
rip->i_last_pos_bl_alloc = position;
if (position == 0) {
/* rip->i_last_pos_bl_alloc points to the block position,
* and zero indicates first usage, thus just increment.
*/
rip->i_last_pos_bl_alloc++;
}
}
bp = get_block(rip->i_dev, b, NO_READ);
zero_block(bp);
return(bp);
}
/*===========================================================================*
* zero_block *
*===========================================================================*/
PUBLIC void zero_block(bp)
register struct buf *bp; /* pointer to buffer to zero */
{
/* Zero a block. */
ASSERT(bp->b_bytes > 0);
ASSERT(bp->bp);
memset(bp->b_data, 0, (size_t) bp->b_bytes);
bp->b_dirt = DIRTY;
}