diff --git a/file.c b/file.c index e10b824..7101a50 100644 --- a/file.c +++ b/file.c @@ -67,8 +67,11 @@ fileclose(struct file *f) if(ff.type == FD_PIPE) pipeclose(ff.pipe, ff.writable); - else if(ff.type == FD_INODE) + else if(ff.type == FD_INODE){ + begin_trans(); iput(ff.ip); + commit_trans(); + } } // Get metadata about file f. @@ -116,10 +119,30 @@ filewrite(struct file *f, char *addr, int n) return pipewrite(f->pipe, addr, n); if(f->type == FD_INODE){ ilock(f->ip); - if((r = writei(f->ip, addr, f->off, n)) > 0) + // write a few blocks at a time to avoid exceeding + // the maximum log transaction size, including + // i-node, indirect block, allocation blocks, + // and 2 blocks of slop for non-aligned writes. + // this really belongs lower down, since writei() + // might be writing a device like the console. + int max = ((LOGSIZE-1-1-2) / 2) * 512; + int i = 0; + while(i < n){ + int n1 = n - i; + if(n1 > max) + n1 = max; + begin_trans(); + r = writei(f->ip, addr + i, f->off, n1); + commit_trans(); + if(r < 0) + break; + if(r != n1) + panic("short filewrite"); f->off += r; + i += r; + } iunlock(f->ip); - return r; + return i == n ? n : -1; } panic("filewrite"); } diff --git a/fs.c b/fs.c index a414b65..a76788b 100644 --- a/fs.c +++ b/fs.c @@ -437,13 +437,13 @@ writei(struct inode *ip, char *src, uint off, uint n) if(off > ip->size || off + n < off) return -1; if(off + n > MAXFILE*BSIZE) - n = MAXFILE*BSIZE - off; + return -1; for(tot=0; totdev, bmap(ip, off/BSIZE)); m = min(n - tot, BSIZE - off%BSIZE); memmove(bp->data + off%BSIZE, src, m); - bwrite(bp); + log_write(bp); brelse(bp); } diff --git a/log.c b/log.c index 72a0367..db36ba9 100644 --- a/log.c +++ b/log.c @@ -8,18 +8,36 @@ #include "fs.h" #include "buf.h" -// Dirt simple "logging" supporting only one transaction. All file system calls -// that potentially write a block should be wrapped in begin_trans and commit_trans, -// so that there is never more than one transaction. This serializes all file system -// operations that potentially write, but simplifies recovery (only the last -// one transaction to recover) and concurrency (don't have to worry about reading a modified -// block from a transaction that hasn't committed yet). +// Simple logging. Each system call that might write the file system +// should be surrounded with begin_trans() and commit_trans() calls. +// +// The log holds at most one transaction at a time. Commit forces +// the log (with commit record) to disk, then installs the affected +// blocks to disk, then erases the log. begin_trans() ensures that +// only one system call can be in a transaction; others must wait. +// +// Allowing only one transaction at a time means that the file +// system code doesn't have to worry about the possibility of +// one transaction reading a block that another one has modified, +// for example an i-node block. +// +// Read-only system calls don't need to use transactions, though +// this means that they may observe uncommitted data. I-node +// and buffer locks prevent read-only calls from seeing inconsistent data. +// +// The log is a physical re-do log containing disk blocks. +// The on-disk log format: +// header block, containing sector #s for block A, B, C, ... +// block A +// block B +// block C +// ... +// Log appends are synchronous. -// The header of the log. If head == 0, there are no log entries. All entries till head -// are committed. sector[] records the home sector for each block in the log -// (i.e., physical logging). +// Contents of the header block, used for both the on-disk header block +// and to keep track in memory of logged sector #s before commit. struct logheader { - int head; + int n; int sector[LOGSIZE]; }; @@ -55,10 +73,10 @@ install_trans(void) { int tail; - if (log.lh.head > 0) - cprintf("install_trans %d\n", log.lh.head); - for (tail = 0; tail < log.lh.head; tail++) { - cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]); + //if (log.lh.n > 0) + // cprintf("install_trans %d\n", log.lh.n); + for (tail = 0; tail < log.lh.n; tail++) { + // cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]); struct buf *lbuf = bread(log.dev, log.start+tail+1); // read i'th block from log struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst block memmove(dbuf->data, lbuf->data, BSIZE); @@ -75,27 +93,27 @@ read_head(void) struct buf *buf = bread(log.dev, log.start); struct logheader *lh = (struct logheader *) (buf->data); int i; - log.lh.head = lh->head; - for (i = 0; i < log.lh.head; i++) { + log.lh.n = lh->n; + for (i = 0; i < log.lh.n; i++) { log.lh.sector[i] = lh->sector[i]; } brelse(buf); - if (log.lh.head > 0) - cprintf("read_head: %d\n", log.lh.head); + //if (log.lh.n > 0) + // cprintf("read_head: %d\n", log.lh.n); } // Write the in-memory log header to disk, committing log entries till head static void write_head(void) { - if (log.lh.head > 0) - cprintf("write_head: %d\n", log.lh.head); + // if (log.lh.n > 0) + // cprintf("write_head: %d\n", log.lh.n); struct buf *buf = bread(log.dev, log.start); struct logheader *hb = (struct logheader *) (buf->data); int i; - hb->head = log.lh.head; - for (i = 0; i < log.lh.head; i++) { + hb->n = log.lh.n; + for (i = 0; i < log.lh.n; i++) { hb->sector[i] = log.lh.sector[i]; } bwrite(buf); @@ -107,7 +125,7 @@ recover_from_log(void) { read_head(); install_trans(); // Install all transactions till head - log.lh.head = 0; + log.lh.n = 0; write_head(); // Reclaim log } @@ -127,7 +145,7 @@ commit_trans(void) { write_head(); // This causes all blocks till log.head to be commited install_trans(); // Install all the transactions till head - log.lh.head = 0; + log.lh.n = 0; write_head(); // Reclaim log acquire(&log.lock); @@ -136,21 +154,27 @@ commit_trans(void) release(&log.lock); } -// Write buffer into the log at log.head and record the block number log.lh.entry, but -// don't write the log header (which would commit the write). +// Caller has modified b->data and is done with the buffer. +// Append the block to the log and record the block number, +// but don't write the log header (which would commit the write). +// log_write() replaces bwrite(); a typical use is: +// bp = bread(...) +// modify bp->data[] +// log_write(bp) +// brelse(bp) void log_write(struct buf *b) { int i; - if (log.lh.head >= LOGSIZE) + if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) panic("too big a transaction"); if (!log.intrans) panic("write outside of trans"); - cprintf("log_write: %d %d\n", b->sector, log.lh.head); + // cprintf("log_write: %d %d\n", b->sector, log.lh.n); - for (i = 0; i < log.lh.head; i++) { + for (i = 0; i < log.lh.n; i++) { if (log.lh.sector[i] == b->sector) // log absorbtion? break; } @@ -159,6 +183,6 @@ log_write(struct buf *b) memmove(lbuf->data, b->data, BSIZE); bwrite(lbuf); brelse(lbuf); - if (i == log.lh.head) - log.lh.head++; + if (i == log.lh.n) + log.lh.n++; } diff --git a/param.h b/param.h index 03c05f9..b6f6f46 100644 --- a/param.h +++ b/param.h @@ -8,5 +8,5 @@ #define NDEV 10 // maximum major device number #define ROOTDEV 1 // device number of file system root disk #define MAXARG 32 // max exec arguments -#define LOGSIZE 10 // size of log +#define LOGSIZE 10 // max data sectors in on-disk log diff --git a/syscall.c b/syscall.c index b848716..71c369c 100644 --- a/syscall.c +++ b/syscall.c @@ -141,9 +141,7 @@ syscall(void) if(num >= 0 && num < SYS_open && syscalls[num]) { proc->tf->eax = syscalls[num](); } else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) { - begin_trans(); proc->tf->eax = syscalls[num](); - commit_trans(); } else { cprintf("%d %s: unknown sys call %d\n", proc->pid, proc->name, num); diff --git a/sysfile.c b/sysfile.c index 4235660..ca54013 100644 --- a/sysfile.c +++ b/sysfile.c @@ -121,6 +121,9 @@ sys_link(void) iunlockput(ip); return -1; } + + begin_trans(); + ip->nlink++; iupdate(ip); iunlock(ip); @@ -134,6 +137,9 @@ sys_link(void) } iunlockput(dp); iput(ip); + + commit_trans(); + return 0; bad: @@ -141,6 +147,7 @@ bad: ip->nlink--; iupdate(ip); iunlockput(ip); + commit_trans(); return -1; } @@ -195,6 +202,8 @@ sys_unlink(void) return -1; } + begin_trans(); + memset(&de, 0, sizeof(de)); if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) panic("unlink: writei"); @@ -207,6 +216,9 @@ sys_unlink(void) ip->nlink--; iupdate(ip); iunlockput(ip); + + commit_trans(); + return 0; } @@ -251,6 +263,7 @@ create(char *path, short type, short major, short minor) panic("create: dirlink"); iunlockput(dp); + return ip; } @@ -265,7 +278,10 @@ sys_open(void) if(argstr(0, &path) < 0 || argint(1, &omode) < 0) return -1; if(omode & O_CREATE){ - if((ip = create(path, T_FILE, 0, 0)) == 0) + begin_trans(); + ip = create(path, T_FILE, 0, 0); + commit_trans(); + if(ip == 0) return -1; } else { if((ip = namei(path)) == 0) @@ -299,9 +315,13 @@ sys_mkdir(void) char *path; struct inode *ip; - if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0) + begin_trans(); + if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ + commit_trans(); return -1; + } iunlockput(ip); + commit_trans(); return 0; } @@ -313,12 +333,16 @@ sys_mknod(void) int len; int major, minor; + begin_trans(); if((len=argstr(0, &path)) < 0 || argint(1, &major) < 0 || argint(2, &minor) < 0 || - (ip = create(path, T_DEV, major, minor)) == 0) + (ip = create(path, T_DEV, major, minor)) == 0){ + commit_trans(); return -1; + } iunlockput(ip); + commit_trans(); return 0; } diff --git a/usertests.c b/usertests.c index 3bffadb..ba648a7 100644 --- a/usertests.c +++ b/usertests.c @@ -7,7 +7,7 @@ #include "traps.h" #include "memlayout.h" -char buf[2048]; +char buf[8192]; char name[3]; char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 }; int stdout = 1; @@ -968,6 +968,36 @@ subdir(void) printf(1, "subdir ok\n"); } +// test writes that are larger than the log. +void +bigwrite(void) +{ + int fd, sz; + + printf(1, "bigwrite test\n"); + + unlink("bigwrite"); + for(sz = 499; sz < 12*512; sz += 471){ + fd = open("bigwrite", O_CREATE | O_RDWR); + if(fd < 0){ + printf(1, "cannot create bigwrite\n"); + exit(); + } + int i; + for(i = 0; i < 2; i++){ + int cc = write(fd, buf, sz); + if(cc != sz){ + printf(1, "write(%d) ret %d\n", sz, cc); + exit(); + } + } + close(fd); + unlink("bigwrite"); + } + + printf(1, "bigwrite ok\n"); +} + void bigfile(void) { @@ -1467,6 +1497,7 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + bigwrite(); bigargtest(); bsstest(); sbrktest();