From 13a96baefc0ff5d8262c4bc8c797bee4b157443c Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Wed, 27 Jul 2011 20:35:46 -0400 Subject: [PATCH] Dirt simple logging Passes usertests and stressfs Seems to recover correctly in a number of simple cases --- Makefile | 1 + defs.h | 8 +++ fs.c | 12 ++-- fs.h | 1 + initcode.S | 3 + log.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 2 +- mkfs.c | 13 +++-- param.h | 2 + syscall.c | 49 ++++++++++------ syscall.h | 36 ++++++------ 11 files changed, 244 insertions(+), 47 deletions(-) create mode 100644 log.c diff --git a/Makefile b/Makefile index 3487aa4..f67c88c 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,7 @@ OBJS = \ uart.o\ vectors.o\ vm.o\ + log.o\ # Cross-compiling (e.g., on Mac OS X) #TOOLPREFIX = i386-jos-elf- diff --git a/defs.h b/defs.h index 8ea46d6..bbe4ae4 100644 --- a/defs.h +++ b/defs.h @@ -6,6 +6,7 @@ struct pipe; struct proc; struct spinlock; struct stat; +struct superblock; // bio.c void binit(void); @@ -32,6 +33,7 @@ int filestat(struct file*, struct stat*); int filewrite(struct file*, char*, int n); // fs.c +void readsb(int dev, struct superblock *sb); int dirlink(struct inode*, char*, uint); struct inode* dirlookup(struct inode*, char*, uint*); struct inode* ialloc(uint, short); @@ -75,6 +77,12 @@ void lapicinit(int); void lapicstartap(uchar, uint); void microdelay(int); +// log.c +void initlog(void); +void log_write(struct buf*); +void begin_trans(); +void commit_trans(); + // mp.c extern int ismp; int mpbcpu(void); diff --git a/fs.c b/fs.c index 7c6d904..a414b65 100644 --- a/fs.c +++ b/fs.c @@ -25,7 +25,7 @@ static void itrunc(struct inode*); // Read the super block. -static void +void readsb(int dev, struct superblock *sb) { struct buf *bp; @@ -65,7 +65,7 @@ balloc(uint dev) m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0){ // Is block free? bp->data[bi/8] |= m; // Mark block in use on disk. - bwrite(bp); + log_write(bp); brelse(bp); return b + bi; } @@ -92,7 +92,7 @@ bfree(int dev, uint b) if((bp->data[bi/8] & m) == 0) panic("freeing free block"); bp->data[bi/8] &= ~m; // Mark block free on disk. - bwrite(bp); + log_write(bp); brelse(bp); } @@ -159,7 +159,7 @@ ialloc(uint dev, short type) if(dip->type == 0){ // a free inode memset(dip, 0, sizeof(*dip)); dip->type = type; - bwrite(bp); // mark it allocated on the disk + log_write(bp); // mark it allocated on the disk brelse(bp); return iget(dev, inum); } @@ -183,7 +183,7 @@ iupdate(struct inode *ip) dip->nlink = ip->nlink; dip->size = ip->size; memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); - bwrite(bp); + log_write(bp); brelse(bp); } @@ -339,7 +339,7 @@ bmap(struct inode *ip, uint bn) a = (uint*)bp->data; if((addr = a[bn]) == 0){ a[bn] = addr = balloc(ip->dev); - bwrite(bp); + log_write(bp); } brelse(bp); return addr; diff --git a/fs.h b/fs.h index 1e6137b..c9e34bf 100644 --- a/fs.h +++ b/fs.h @@ -13,6 +13,7 @@ struct superblock { uint size; // Size of file system image (blocks) uint nblocks; // Number of data blocks uint ninodes; // Number of inodes. + uint nlog; // Number of log blocks }; #define NDIRECT 12 diff --git a/initcode.S b/initcode.S index 41e84f4..d86660a 100644 --- a/initcode.S +++ b/initcode.S @@ -3,9 +3,12 @@ #include "syscall.h" #include "traps.h" + # exec(init, argv) .globl start start: + movl $SYS_init, %eax + int $T_SYSCALL pushl $argv pushl $init pushl $0 // where caller pc would be diff --git a/log.c b/log.c new file mode 100644 index 0000000..72a0367 --- /dev/null +++ b/log.c @@ -0,0 +1,164 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "spinlock.h" +#include "fs.h" +#include "buf.h" + +// Dirt simple "logging" supporting only one transaction. All file system calls +// that potentially write a block should be wrapped in begin_trans and commit_trans, +// so that there is never more than one transaction. This serializes all file system +// operations that potentially write, but simplifies recovery (only the last +// one transaction to recover) and concurrency (don't have to worry about reading a modified +// block from a transaction that hasn't committed yet). + +// The header of the log. If head == 0, there are no log entries. All entries till head +// are committed. sector[] records the home sector for each block in the log +// (i.e., physical logging). +struct logheader { + int head; + int sector[LOGSIZE]; +}; + +struct { + struct spinlock lock; + int start; + int size; + int intrans; + int dev; + struct logheader lh; +} log; + +static void recover_from_log(void); + +void +initlog(void) +{ + if (sizeof(struct logheader) >= BSIZE) + panic("initlog: too big logheader"); + + struct superblock sb; + initlock(&log.lock, "log"); + readsb(ROOTDEV, &sb); + log.start = sb.size - sb.nlog; + log.size = sb.nlog; + log.dev = ROOTDEV; + recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ + int tail; + + if (log.lh.head > 0) + cprintf("install_trans %d\n", log.lh.head); + for (tail = 0; tail < log.lh.head; tail++) { + cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]); + struct buf *lbuf = bread(log.dev, log.start+tail+1); // read i'th block from log + struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst block + memmove(dbuf->data, lbuf->data, BSIZE); + bwrite(dbuf); + brelse(lbuf); + brelse(dbuf); + } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *lh = (struct logheader *) (buf->data); + int i; + log.lh.head = lh->head; + for (i = 0; i < log.lh.head; i++) { + log.lh.sector[i] = lh->sector[i]; + } + brelse(buf); + if (log.lh.head > 0) + cprintf("read_head: %d\n", log.lh.head); +} + +// Write the in-memory log header to disk, committing log entries till head +static void +write_head(void) +{ + if (log.lh.head > 0) + cprintf("write_head: %d\n", log.lh.head); + + struct buf *buf = bread(log.dev, log.start); + struct logheader *hb = (struct logheader *) (buf->data); + int i; + hb->head = log.lh.head; + for (i = 0; i < log.lh.head; i++) { + hb->sector[i] = log.lh.sector[i]; + } + bwrite(buf); + brelse(buf); +} + +static void +recover_from_log(void) +{ + read_head(); + install_trans(); // Install all transactions till head + log.lh.head = 0; + write_head(); // Reclaim log +} + +void +begin_trans(void) +{ + acquire(&log.lock); + while (log.intrans) { + sleep(&log, &log.lock); + } + log.intrans = 1; + release(&log.lock); +} + +void +commit_trans(void) +{ + write_head(); // This causes all blocks till log.head to be commited + install_trans(); // Install all the transactions till head + log.lh.head = 0; + write_head(); // Reclaim log + + acquire(&log.lock); + log.intrans = 0; + wakeup(&log); + release(&log.lock); +} + +// Write buffer into the log at log.head and record the block number log.lh.entry, but +// don't write the log header (which would commit the write). +void +log_write(struct buf *b) +{ + int i; + + if (log.lh.head >= LOGSIZE) + panic("too big a transaction"); + if (!log.intrans) + panic("write outside of trans"); + + cprintf("log_write: %d %d\n", b->sector, log.lh.head); + + for (i = 0; i < log.lh.head; i++) { + if (log.lh.sector[i] == b->sector) // log absorbtion? + break; + } + log.lh.sector[i] = b->sector; + struct buf *lbuf = bread(b->dev, log.start+i+1); + memmove(lbuf->data, b->data, BSIZE); + bwrite(lbuf); + brelse(lbuf); + if (i == log.lh.head) + log.lh.head++; +} diff --git a/main.c b/main.c index e6d81f3..a27c4ff 100644 --- a/main.c +++ b/main.c @@ -20,7 +20,7 @@ main(void) lapicinit(mpbcpu()); seginit(); // set up segments kinit(); // initialize memory allocator - jmpkstack(); // call mainc() on a properly-allocated stack + jmpkstack(); // call mainc() on a properly-allocated stack } void diff --git a/mkfs.c b/mkfs.c index 20b9649..f015edd 100644 --- a/mkfs.c +++ b/mkfs.c @@ -9,8 +9,10 @@ #include "types.h" #include "fs.h" #include "stat.h" +#include "param.h" -int nblocks = 995; +int nblocks = 985; +int nlog = LOGSIZE; int ninodes = 200; int size = 1024; @@ -79,17 +81,18 @@ main(int argc, char *argv[]) sb.size = xint(size); sb.nblocks = xint(nblocks); // so whole disk is size sectors sb.ninodes = xint(ninodes); + sb.nlog = xint(nlog); bitblocks = size/(512*8) + 1; usedblocks = ninodes / IPB + 3 + bitblocks; freeblock = usedblocks; - printf("used %d (bit %d ninode %zu) free %u total %d\n", usedblocks, - bitblocks, ninodes/IPB + 1, freeblock, nblocks+usedblocks); + printf("used %d (bit %d ninode %zu) free %u log %u total %d\n", usedblocks, + bitblocks, ninodes/IPB + 1, freeblock, nlog, nblocks+usedblocks+nlog); - assert(nblocks + usedblocks == size); + assert(nblocks + usedblocks + nlog == size); - for(i = 0; i < nblocks + usedblocks; i++) + for(i = 0; i < nblocks + usedblocks + nlog; i++) wsect(i, zeroes); memset(buf, 0, sizeof(buf)); diff --git a/param.h b/param.h index 70f88e8..ab1b9fe 100644 --- a/param.h +++ b/param.h @@ -10,3 +10,5 @@ #define USERTOP 0xA0000 // end of user address space #define PHYSTOP 0x1000000 // use phys mem up to here as free pool #define MAXARG 32 // max exec arguments +#define LOGSIZE 10 // size of log + diff --git a/syscall.c b/syscall.c index f6550a1..ce50a59 100644 --- a/syscall.c +++ b/syscall.c @@ -98,39 +98,52 @@ extern int sys_wait(void); extern int sys_write(void); extern int sys_uptime(void); +int +sys_init(void) +{ + initlog(); + return 0; +} + static int (*syscalls[])(void) = { -[SYS_chdir] sys_chdir, -[SYS_close] sys_close, -[SYS_dup] sys_dup, -[SYS_exec] sys_exec, -[SYS_exit] sys_exit, +[SYS_init] sys_init, [SYS_fork] sys_fork, -[SYS_fstat] sys_fstat, -[SYS_getpid] sys_getpid, -[SYS_kill] sys_kill, -[SYS_link] sys_link, -[SYS_mkdir] sys_mkdir, -[SYS_mknod] sys_mknod, -[SYS_open] sys_open, +[SYS_exit] sys_exit, +[SYS_wait] sys_wait, [SYS_pipe] sys_pipe, [SYS_read] sys_read, +[SYS_kill] sys_kill, +[SYS_exec] sys_exec, +[SYS_fstat] sys_fstat, +[SYS_chdir] sys_chdir, +[SYS_dup] sys_dup, +[SYS_getpid] sys_getpid, [SYS_sbrk] sys_sbrk, [SYS_sleep] sys_sleep, -[SYS_unlink] sys_unlink, -[SYS_wait] sys_wait, -[SYS_write] sys_write, [SYS_uptime] sys_uptime, +// File system calls that are run in a transaction: +[SYS_open] sys_open, +[SYS_write] sys_write, +[SYS_mknod] sys_mknod, +[SYS_unlink] sys_unlink, +[SYS_link] sys_link, +[SYS_mkdir] sys_mkdir, +[SYS_close] sys_close, }; void syscall(void) { int num; - + num = proc->tf->eax; - if(num >= 0 && num < NELEM(syscalls) && syscalls[num]) + if(num >= 0 && num < SYS_open && syscalls[num]) { proc->tf->eax = syscalls[num](); - else { + } else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) { + begin_trans(); + proc->tf->eax = syscalls[num](); + commit_trans(); + } else { cprintf("%d %s: unknown sys call %d\n", proc->pid, proc->name, num); proc->tf->eax = -1; diff --git a/syscall.h b/syscall.h index 3a0fbca..e9e43a2 100644 --- a/syscall.h +++ b/syscall.h @@ -1,22 +1,24 @@ // System call numbers +#define SYS_init 0 #define SYS_fork 1 #define SYS_exit 2 #define SYS_wait 3 #define SYS_pipe 4 -#define SYS_write 5 -#define SYS_read 6 -#define SYS_close 7 -#define SYS_kill 8 -#define SYS_exec 9 -#define SYS_open 10 -#define SYS_mknod 11 -#define SYS_unlink 12 -#define SYS_fstat 13 -#define SYS_link 14 -#define SYS_mkdir 15 -#define SYS_chdir 16 -#define SYS_dup 17 -#define SYS_getpid 18 -#define SYS_sbrk 19 -#define SYS_sleep 20 -#define SYS_uptime 21 +#define SYS_read 5 +#define SYS_kill 6 +#define SYS_exec 7 +#define SYS_fstat 8 +#define SYS_chdir 9 +#define SYS_dup 10 +#define SYS_getpid 11 +#define SYS_sbrk 12 +#define SYS_sleep 13 +#define SYS_uptime 14 + +#define SYS_open 15 +#define SYS_write 16 +#define SYS_mknod 17 +#define SYS_unlink 18 +#define SYS_link 19 +#define SYS_mkdir 20 +#define SYS_close 21