clarify some FS comments

This commit is contained in:
Robert Morris 2011-10-11 06:41:37 -04:00
parent d73dd097a5
commit a5fbfe418a
6 changed files with 69 additions and 46 deletions

16
bio.c
View file

@ -7,7 +7,7 @@
// //
// Interface: // Interface:
// * To get a buffer for a particular disk block, call bread. // * To get a buffer for a particular disk block, call bread.
// * After changing buffer data, call bwrite to flush it to disk. // * After changing buffer data, call bwrite to write it to disk.
// * When done with the buffer, call brelse. // * When done with the buffer, call brelse.
// * Do not use the buffer after calling brelse. // * Do not use the buffer after calling brelse.
// * Only one process at a time can use a buffer, // * Only one process at a time can use a buffer,
@ -16,8 +16,7 @@
// The implementation uses three state flags internally: // The implementation uses three state flags internally:
// * B_BUSY: the block has been returned from bread // * B_BUSY: the block has been returned from bread
// and has not been passed back to brelse. // and has not been passed back to brelse.
// * B_VALID: the buffer data has been initialized // * B_VALID: the buffer data has been read from the disk.
// with the associated disk block contents.
// * B_DIRTY: the buffer data has been modified // * B_DIRTY: the buffer data has been modified
// and needs to be written to disk. // and needs to be written to disk.
@ -58,7 +57,7 @@ binit(void)
// Look through buffer cache for sector on device dev. // Look through buffer cache for sector on device dev.
// If not found, allocate fresh block. // If not found, allocate fresh block.
// In either case, return locked buffer. // In either case, return B_BUSY buffer.
static struct buf* static struct buf*
bget(uint dev, uint sector) bget(uint dev, uint sector)
{ {
@ -67,7 +66,7 @@ bget(uint dev, uint sector)
acquire(&bcache.lock); acquire(&bcache.lock);
loop: loop:
// Try for cached block. // Is the sector already cached?
for(b = bcache.head.next; b != &bcache.head; b = b->next){ for(b = bcache.head.next; b != &bcache.head; b = b->next){
if(b->dev == dev && b->sector == sector){ if(b->dev == dev && b->sector == sector){
if(!(b->flags & B_BUSY)){ if(!(b->flags & B_BUSY)){
@ -80,7 +79,7 @@ bget(uint dev, uint sector)
} }
} }
// Allocate fresh block. // Not cached; recycle some existing buffer.
for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ for(b = bcache.head.prev; b != &bcache.head; b = b->prev){
if((b->flags & B_BUSY) == 0){ if((b->flags & B_BUSY) == 0){
b->dev = dev; b->dev = dev;
@ -105,7 +104,7 @@ bread(uint dev, uint sector)
return b; return b;
} }
// Write b's contents to disk. Must be locked. // Write b's contents to disk. Must be B_BUSY.
void void
bwrite(struct buf *b) bwrite(struct buf *b)
{ {
@ -115,7 +114,8 @@ bwrite(struct buf *b)
iderw(b); iderw(b);
} }
// Release the buffer b. // Release a B_BUSY buffer.
// Move to the head of the MRU list.
void void
brelse(struct buf *b) brelse(struct buf *b)
{ {

8
file.h
View file

@ -9,8 +9,7 @@ struct file {
}; };
// in-core file system types // in-memory copy of an inode
struct inode { struct inode {
uint dev; // Device number uint dev; // Device number
uint inum; // Inode number uint inum; // Inode number
@ -24,12 +23,11 @@ struct inode {
uint size; uint size;
uint addrs[NDIRECT+1]; uint addrs[NDIRECT+1];
}; };
#define I_BUSY 0x1 #define I_BUSY 0x1
#define I_VALID 0x2 #define I_VALID 0x2
// device implementations // table mapping major device number to
// device functions
struct devsw { struct devsw {
int (*read)(struct inode*, char*, int); int (*read)(struct inode*, char*, int);
int (*write)(struct inode*, char*, int); int (*write)(struct inode*, char*, int);

54
fs.c
View file

@ -1,11 +1,10 @@
// File system implementation. Four layers: // File system implementation. Five layers:
// + Blocks: allocator for raw disk blocks. // + Blocks: allocator for raw disk blocks.
// + Log: crash recovery for multi-step updates.
// + Files: inode allocator, reading, writing, metadata. // + Files: inode allocator, reading, writing, metadata.
// + Directories: inode with special contents (list of other inodes!) // + Directories: inode with special contents (list of other inodes!)
// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
// //
// Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
//
// This file contains the low-level file system manipulation // This file contains the low-level file system manipulation
// routines. The (higher-level) system call implementations // routines. The (higher-level) system call implementations
// are in sysfile.c. // are in sysfile.c.
@ -61,10 +60,10 @@ balloc(uint dev)
readsb(dev, &sb); readsb(dev, &sb);
for(b = 0; b < sb.size; b += BPB){ for(b = 0; b < sb.size; b += BPB){
bp = bread(dev, BBLOCK(b, sb.ninodes)); bp = bread(dev, BBLOCK(b, sb.ninodes));
for(bi = 0; bi < BPB && bi < (sb.size - b); bi++){ for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
m = 1 << (bi % 8); m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0){ // Is block free? if((bp->data[bi/8] & m) == 0){ // Is block free?
bp->data[bi/8] |= m; // Mark block in use on disk. bp->data[bi/8] |= m; // Mark block in use.
log_write(bp); log_write(bp);
brelse(bp); brelse(bp);
bzero(dev, b + bi); bzero(dev, b + bi);
@ -90,22 +89,27 @@ bfree(int dev, uint b)
m = 1 << (bi % 8); m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0) if((bp->data[bi/8] & m) == 0)
panic("freeing free block"); panic("freeing free block");
bp->data[bi/8] &= ~m; // Mark block free on disk. bp->data[bi/8] &= ~m;
log_write(bp); log_write(bp);
brelse(bp); brelse(bp);
} }
// Inodes. // Inodes.
// //
// An inode is a single, unnamed file in the file system. // An inode describes a single unnamed file.
// The inode disk structure holds metadata (the type, device numbers, // The inode disk structure holds metadata: the file's type,
// and data size) along with a list of blocks where the associated // its size, the number of links referring to it, and the
// data can be found. // list of blocks holding the file's content.
// //
// The inodes are laid out sequentially on disk immediately after // The inodes are laid out sequentially on disk immediately after
// the superblock. The kernel keeps a cache of the in-use // the superblock. Each inode has a number, indicating its
// on-disk structures to provide a place for synchronizing access // position on the disk.
// to inodes shared between multiple processes. //
// The kernel keeps a cache of in-use inodes in memory
// to provide a place for synchronizing access
// to inodes used by multiple processes. The cached
// inodes include book-keeping information that is
// not stored on disk: ip->ref and ip->flags.
// //
// ip->ref counts the number of pointer references to this cached // ip->ref counts the number of pointer references to this cached
// inode; references are typically kept in struct file and in proc->cwd. // inode; references are typically kept in struct file and in proc->cwd.
@ -114,11 +118,12 @@ bfree(int dev, uint b)
// //
// Processes are only allowed to read and write inode // Processes are only allowed to read and write inode
// metadata and contents when holding the inode's lock, // metadata and contents when holding the inode's lock,
// represented by the I_BUSY flag in the in-memory copy. // represented by the I_BUSY bit in ip->flags.
// Because inode locks are held during disk accesses, // Because inode locks are held during disk accesses,
// they are implemented using a flag rather than with // they are implemented using a flag rather than with
// spin locks. Callers are responsible for locking // spin locks. ilock() and iunlock() manipulate an
// inodes before passing them to routines in this file; leaving // inode's I_BUSY flag. Many routines in this file expect
// the caller to have already locked the inode; leaving
// this responsibility with the caller makes it possible for them // this responsibility with the caller makes it possible for them
// to create arbitrarily-sized atomic operations. // to create arbitrarily-sized atomic operations.
// //
@ -127,6 +132,19 @@ bfree(int dev, uint b)
// return pointers to *unlocked* inodes. It is the callers' // return pointers to *unlocked* inodes. It is the callers'
// responsibility to lock them before using them. A non-zero // responsibility to lock them before using them. A non-zero
// ip->ref keeps these unlocked inodes in the cache. // ip->ref keeps these unlocked inodes in the cache.
//
// In order for the file system code to look at an inode, the inode
// must pass through a number of states, with transitions
// driven by the indicated functions:
//
// * Allocated on disk, indicated by a non-zero type.
// ialloc() and iput().
// * Referenced in the cache, indicated by ip->ref > 0.
// iget() and iput().
// * Cached inode is valid, indicated by I_VALID.
// ilock() and iput().
// * Locked, indicated by I_BUSY.
// ilock() and iunlock().
struct { struct {
struct spinlock lock; struct spinlock lock;
@ -143,6 +161,7 @@ static struct inode* iget(uint dev, uint inum);
//PAGEBREAK! //PAGEBREAK!
// Allocate a new inode with the given type on device dev. // Allocate a new inode with the given type on device dev.
// A free inode has a type of zero.
struct inode* struct inode*
ialloc(uint dev, short type) ialloc(uint dev, short type)
{ {
@ -152,7 +171,8 @@ ialloc(uint dev, short type)
struct superblock sb; struct superblock sb;
readsb(dev, &sb); readsb(dev, &sb);
for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks
for(inum = 1; inum < sb.ninodes; inum++){
bp = bread(dev, IBLOCK(inum)); bp = bread(dev, IBLOCK(inum));
dip = (struct dinode*)bp->data + inum%IPB; dip = (struct dinode*)bp->data + inum%IPB;
if(dip->type == 0){ // a free inode if(dip->type == 0){ // a free inode

8
fs.h
View file

@ -1,8 +1,12 @@
// On-disk file system format. // On-disk file system format.
// Both the kernel and user programs use this header file. // Both the kernel and user programs use this header file.
// Block 0 is unused. Block 1 is super block. // Block 0 is unused.
// Inodes start at block 2. // Block 1 is super block.
// Blocks 2 through sb.ninodes/IPB hold inodes.
// Then free bitmap blocks holding sb.size bits.
// Then sb.nblocks data blocks.
// Then sb.nlog log blocks.
#define ROOTINO 1 // root i-number #define ROOTINO 1 // root i-number
#define BSIZE 512 // block size #define BSIZE 512 // block size

7
ide.c
View file

@ -93,7 +93,7 @@ ideintr(void)
{ {
struct buf *b; struct buf *b;
// Take first buffer off queue. // First queued buffer is the active request.
acquire(&idelock); acquire(&idelock);
if((b = idequeue) == 0){ if((b = idequeue) == 0){
release(&idelock); release(&idelock);
@ -134,11 +134,11 @@ iderw(struct buf *b)
if(b->dev != 0 && !havedisk1) if(b->dev != 0 && !havedisk1)
panic("iderw: ide disk 1 not present"); panic("iderw: ide disk 1 not present");
acquire(&idelock); // DOC:acquire-lock acquire(&idelock); //DOC: acquire-lock
// Append b to idequeue. // Append b to idequeue.
b->qnext = 0; b->qnext = 0;
for(pp=&idequeue; *pp; pp=&(*pp)->qnext) // DOC:insert-queue for(pp=&idequeue; *pp; pp=&(*pp)->qnext) //DOC: insert-queue
; ;
*pp = b; *pp = b;
@ -147,7 +147,6 @@ iderw(struct buf *b)
idestart(b); idestart(b);
// Wait for request to finish. // Wait for request to finish.
// Assuming will not sleep too long: ignore proc->killed.
while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){
sleep(b, &idelock); sleep(b, &idelock);
} }

22
log.c
View file

@ -42,7 +42,7 @@ struct log {
struct spinlock lock; struct spinlock lock;
int start; int start;
int size; int size;
int intrans; int busy; // a transaction is active
int dev; int dev;
struct logheader lh; struct logheader lh;
}; };
@ -75,7 +75,7 @@ install_trans(void)
struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block
struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst
memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst
bwrite(dbuf); // flush dst to disk bwrite(dbuf); // write dst to disk
brelse(lbuf); brelse(lbuf);
brelse(dbuf); brelse(dbuf);
} }
@ -95,7 +95,9 @@ read_head(void)
brelse(buf); brelse(buf);
} }
// Write in-memory log header to disk, committing log entries till head // Write in-memory log header to disk.
// This is the true point at which the
// current transaction commits.
static void static void
write_head(void) write_head(void)
{ {
@ -123,10 +125,10 @@ void
begin_trans(void) begin_trans(void)
{ {
acquire(&log.lock); acquire(&log.lock);
while (log.intrans) { while (log.busy) {
sleep(&log, &log.lock); sleep(&log, &log.lock);
} }
log.intrans = 1; log.busy = 1;
release(&log.lock); release(&log.lock);
} }
@ -134,14 +136,14 @@ void
commit_trans(void) commit_trans(void)
{ {
if (log.lh.n > 0) { if (log.lh.n > 0) {
write_head(); // Causes all blocks till log.head to be commited write_head(); // Write header to disk -- the real commit
install_trans(); // Install all the transactions till head install_trans(); // Now install writes to home locations
log.lh.n = 0; log.lh.n = 0;
write_head(); // Reclaim log write_head(); // Erase the transaction from the log
} }
acquire(&log.lock); acquire(&log.lock);
log.intrans = 0; log.busy = 0;
wakeup(&log); wakeup(&log);
release(&log.lock); release(&log.lock);
} }
@ -161,7 +163,7 @@ log_write(struct buf *b)
if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
panic("too big a transaction"); panic("too big a transaction");
if (!log.intrans) if (!log.busy)
panic("write outside of trans"); panic("write outside of trans");
for (i = 0; i < log.lh.n; i++) { for (i = 0; i < log.lh.n; i++) {