From a5fbfe418abd9bdb876407a73b479cdc39046e9a Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Tue, 11 Oct 2011 06:41:37 -0400 Subject: [PATCH] clarify some FS comments --- bio.c | 16 ++++++++-------- file.h | 8 +++----- fs.c | 54 +++++++++++++++++++++++++++++++++++++----------------- fs.h | 8 ++++++-- ide.c | 7 +++---- log.c | 22 ++++++++++++---------- 6 files changed, 69 insertions(+), 46 deletions(-) diff --git a/bio.c b/bio.c index 6a3968b..88b0964 100644 --- a/bio.c +++ b/bio.c @@ -7,7 +7,7 @@ // // Interface: // * To get a buffer for a particular disk block, call bread. -// * After changing buffer data, call bwrite to flush it to disk. +// * After changing buffer data, call bwrite to write it to disk. // * When done with the buffer, call brelse. // * Do not use the buffer after calling brelse. // * Only one process at a time can use a buffer, @@ -16,8 +16,7 @@ // The implementation uses three state flags internally: // * B_BUSY: the block has been returned from bread // and has not been passed back to brelse. -// * B_VALID: the buffer data has been initialized -// with the associated disk block contents. +// * B_VALID: the buffer data has been read from the disk. // * B_DIRTY: the buffer data has been modified // and needs to be written to disk. @@ -58,7 +57,7 @@ binit(void) // Look through buffer cache for sector on device dev. // If not found, allocate fresh block. -// In either case, return locked buffer. +// In either case, return B_BUSY buffer. static struct buf* bget(uint dev, uint sector) { @@ -67,7 +66,7 @@ bget(uint dev, uint sector) acquire(&bcache.lock); loop: - // Try for cached block. + // Is the sector already cached? for(b = bcache.head.next; b != &bcache.head; b = b->next){ if(b->dev == dev && b->sector == sector){ if(!(b->flags & B_BUSY)){ @@ -80,7 +79,7 @@ bget(uint dev, uint sector) } } - // Allocate fresh block. + // Not cached; recycle some existing buffer. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ if((b->flags & B_BUSY) == 0){ b->dev = dev; @@ -105,7 +104,7 @@ bread(uint dev, uint sector) return b; } -// Write b's contents to disk. Must be locked. +// Write b's contents to disk. Must be B_BUSY. void bwrite(struct buf *b) { @@ -115,7 +114,8 @@ bwrite(struct buf *b) iderw(b); } -// Release the buffer b. +// Release a B_BUSY buffer. +// Move to the head of the MRU list. void brelse(struct buf *b) { diff --git a/file.h b/file.h index d123696..c02f76c 100644 --- a/file.h +++ b/file.h @@ -9,8 +9,7 @@ struct file { }; -// in-core file system types - +// in-memory copy of an inode struct inode { uint dev; // Device number uint inum; // Inode number @@ -24,12 +23,11 @@ struct inode { uint size; uint addrs[NDIRECT+1]; }; - #define I_BUSY 0x1 #define I_VALID 0x2 -// device implementations - +// table mapping major device number to +// device functions struct devsw { int (*read)(struct inode*, char*, int); int (*write)(struct inode*, char*, int); diff --git a/fs.c b/fs.c index b755c78..4b2f470 100644 --- a/fs.c +++ b/fs.c @@ -1,11 +1,10 @@ -// File system implementation. Four layers: +// File system implementation. Five layers: // + Blocks: allocator for raw disk blocks. +// + Log: crash recovery for multi-step updates. // + Files: inode allocator, reading, writing, metadata. // + Directories: inode with special contents (list of other inodes!) // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. // -// Disk layout is: superblock, inodes, block in-use bitmap, data blocks. -// // This file contains the low-level file system manipulation // routines. The (higher-level) system call implementations // are in sysfile.c. @@ -61,10 +60,10 @@ balloc(uint dev) readsb(dev, &sb); for(b = 0; b < sb.size; b += BPB){ bp = bread(dev, BBLOCK(b, sb.ninodes)); - for(bi = 0; bi < BPB && bi < (sb.size - b); bi++){ + for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0){ // Is block free? - bp->data[bi/8] |= m; // Mark block in use on disk. + bp->data[bi/8] |= m; // Mark block in use. log_write(bp); brelse(bp); bzero(dev, b + bi); @@ -90,22 +89,27 @@ bfree(int dev, uint b) m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0) panic("freeing free block"); - bp->data[bi/8] &= ~m; // Mark block free on disk. + bp->data[bi/8] &= ~m; log_write(bp); brelse(bp); } // Inodes. // -// An inode is a single, unnamed file in the file system. -// The inode disk structure holds metadata (the type, device numbers, -// and data size) along with a list of blocks where the associated -// data can be found. +// An inode describes a single unnamed file. +// The inode disk structure holds metadata: the file's type, +// its size, the number of links referring to it, and the +// list of blocks holding the file's content. // // The inodes are laid out sequentially on disk immediately after -// the superblock. The kernel keeps a cache of the in-use -// on-disk structures to provide a place for synchronizing access -// to inodes shared between multiple processes. +// the superblock. Each inode has a number, indicating its +// position on the disk. +// +// The kernel keeps a cache of in-use inodes in memory +// to provide a place for synchronizing access +// to inodes used by multiple processes. The cached +// inodes include book-keeping information that is +// not stored on disk: ip->ref and ip->flags. // // ip->ref counts the number of pointer references to this cached // inode; references are typically kept in struct file and in proc->cwd. @@ -114,11 +118,12 @@ bfree(int dev, uint b) // // Processes are only allowed to read and write inode // metadata and contents when holding the inode's lock, -// represented by the I_BUSY flag in the in-memory copy. +// represented by the I_BUSY bit in ip->flags. // Because inode locks are held during disk accesses, // they are implemented using a flag rather than with -// spin locks. Callers are responsible for locking -// inodes before passing them to routines in this file; leaving +// spin locks. ilock() and iunlock() manipulate an +// inode's I_BUSY flag. Many routines in this file expect +// the caller to have already locked the inode; leaving // this responsibility with the caller makes it possible for them // to create arbitrarily-sized atomic operations. // @@ -127,6 +132,19 @@ bfree(int dev, uint b) // return pointers to *unlocked* inodes. It is the callers' // responsibility to lock them before using them. A non-zero // ip->ref keeps these unlocked inodes in the cache. +// +// In order for the file system code to look at an inode, the inode +// must pass through a number of states, with transitions +// driven by the indicated functions: +// +// * Allocated on disk, indicated by a non-zero type. +// ialloc() and iput(). +// * Referenced in the cache, indicated by ip->ref > 0. +// iget() and iput(). +// * Cached inode is valid, indicated by I_VALID. +// ilock() and iput(). +// * Locked, indicated by I_BUSY. +// ilock() and iunlock(). struct { struct spinlock lock; @@ -143,6 +161,7 @@ static struct inode* iget(uint dev, uint inum); //PAGEBREAK! // Allocate a new inode with the given type on device dev. +// A free inode has a type of zero. struct inode* ialloc(uint dev, short type) { @@ -152,7 +171,8 @@ ialloc(uint dev, short type) struct superblock sb; readsb(dev, &sb); - for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks + + for(inum = 1; inum < sb.ninodes; inum++){ bp = bread(dev, IBLOCK(inum)); dip = (struct dinode*)bp->data + inum%IPB; if(dip->type == 0){ // a free inode diff --git a/fs.h b/fs.h index 85efa78..f191d43 100644 --- a/fs.h +++ b/fs.h @@ -1,8 +1,12 @@ // On-disk file system format. // Both the kernel and user programs use this header file. -// Block 0 is unused. Block 1 is super block. -// Inodes start at block 2. +// Block 0 is unused. +// Block 1 is super block. +// Blocks 2 through sb.ninodes/IPB hold inodes. +// Then free bitmap blocks holding sb.size bits. +// Then sb.nblocks data blocks. +// Then sb.nlog log blocks. #define ROOTINO 1 // root i-number #define BSIZE 512 // block size diff --git a/ide.c b/ide.c index 4165831..7946f94 100644 --- a/ide.c +++ b/ide.c @@ -93,7 +93,7 @@ ideintr(void) { struct buf *b; - // Take first buffer off queue. + // First queued buffer is the active request. acquire(&idelock); if((b = idequeue) == 0){ release(&idelock); @@ -134,11 +134,11 @@ iderw(struct buf *b) if(b->dev != 0 && !havedisk1) panic("iderw: ide disk 1 not present"); - acquire(&idelock); // DOC:acquire-lock + acquire(&idelock); //DOC: acquire-lock // Append b to idequeue. b->qnext = 0; - for(pp=&idequeue; *pp; pp=&(*pp)->qnext) // DOC:insert-queue + for(pp=&idequeue; *pp; pp=&(*pp)->qnext) //DOC: insert-queue ; *pp = b; @@ -147,7 +147,6 @@ iderw(struct buf *b) idestart(b); // Wait for request to finish. - // Assuming will not sleep too long: ignore proc->killed. while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ sleep(b, &idelock); } diff --git a/log.c b/log.c index 832732c..5b827de 100644 --- a/log.c +++ b/log.c @@ -42,7 +42,7 @@ struct log { struct spinlock lock; int start; int size; - int intrans; + int busy; // a transaction is active int dev; struct logheader lh; }; @@ -75,7 +75,7 @@ install_trans(void) struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst - bwrite(dbuf); // flush dst to disk + bwrite(dbuf); // write dst to disk brelse(lbuf); brelse(dbuf); } @@ -95,7 +95,9 @@ read_head(void) brelse(buf); } -// Write in-memory log header to disk, committing log entries till head +// Write in-memory log header to disk. +// This is the true point at which the +// current transaction commits. static void write_head(void) { @@ -123,10 +125,10 @@ void begin_trans(void) { acquire(&log.lock); - while (log.intrans) { + while (log.busy) { sleep(&log, &log.lock); } - log.intrans = 1; + log.busy = 1; release(&log.lock); } @@ -134,14 +136,14 @@ void commit_trans(void) { if (log.lh.n > 0) { - write_head(); // Causes all blocks till log.head to be commited - install_trans(); // Install all the transactions till head + write_head(); // Write header to disk -- the real commit + install_trans(); // Now install writes to home locations log.lh.n = 0; - write_head(); // Reclaim log + write_head(); // Erase the transaction from the log } acquire(&log.lock); - log.intrans = 0; + log.busy = 0; wakeup(&log); release(&log.lock); } @@ -161,7 +163,7 @@ log_write(struct buf *b) if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) panic("too big a transaction"); - if (!log.intrans) + if (!log.busy) panic("write outside of trans"); for (i = 0; i < log.lh.n; i++) {