xv6-cs450/fs.c

629 lines
14 KiB
C
Raw Normal View History

// File system implementation. Four layers:
2007-08-20 20:23:52 +02:00
// + Blocks: allocator for raw disk blocks.
// + Files: inode allocator, reading, writing, metadata.
// + Directories: inode with special contents (list of other inodes!)
// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
//
2007-08-24 21:46:19 +02:00
// Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
//
// This file contains the low-level file system manipulation
// routines. The (higher-level) system call implementations
// are in sysfile.c.
2007-08-20 20:23:52 +02:00
#include "types.h"
2007-08-28 01:26:33 +02:00
#include "defs.h"
#include "param.h"
2007-08-28 01:26:33 +02:00
#include "stat.h"
#include "mmu.h"
#include "proc.h"
#include "spinlock.h"
#include "buf.h"
#include "fs.h"
#include "fsvar.h"
#include "dev.h"
2007-08-20 20:23:52 +02:00
#define min(a, b) ((a) < (b) ? (a) : (b))
2007-08-21 21:58:55 +02:00
static void itrunc(struct inode*);
2006-07-22 00:10:40 +02:00
// Read the super block.
static void
readsb(int dev, struct superblock *sb)
{
struct buf *bp;
bp = bread(dev, 1);
memmove(sb, bp->data, sizeof(*sb));
brelse(bp);
}
// Zero a block.
static void
bzero(int dev, int bno)
{
struct buf *bp;
bp = bread(dev, bno);
memset(bp->data, 0, BSIZE);
bwrite(bp);
brelse(bp);
}
2007-08-20 20:23:52 +02:00
// Blocks.
2006-09-06 19:50:20 +02:00
// Allocate a disk block.
2006-09-06 19:27:19 +02:00
static uint
balloc(uint dev)
2006-08-09 03:09:36 +02:00
{
int b, bi, m;
2006-08-09 03:09:36 +02:00
struct buf *bp;
struct superblock sb;
2006-08-09 03:09:36 +02:00
bp = 0;
readsb(dev, &sb);
for(b = 0; b < sb.size; b += BPB){
bp = bread(dev, BBLOCK(b, sb.ninodes));
for(bi = 0; bi < BPB; bi++){
m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0){ // Is block free?
bp->data[bi/8] |= m; // Mark block in use on disk.
bwrite(bp);
brelse(bp);
return b + bi;
}
2006-08-09 03:09:36 +02:00
}
brelse(bp);
2006-08-09 03:09:36 +02:00
}
2007-08-10 18:52:31 +02:00
panic("balloc: out of blocks");
2006-08-09 03:09:36 +02:00
}
2006-09-07 16:28:12 +02:00
// Free a disk block.
2006-09-06 19:27:19 +02:00
static void
bfree(int dev, uint b)
{
struct buf *bp;
struct superblock sb;
int bi, m;
bzero(dev, b);
readsb(dev, &sb);
bp = bread(dev, BBLOCK(b, sb.ninodes));
bi = b % BPB;
m = 1 << (bi % 8);
2007-08-24 21:32:36 +02:00
if((bp->data[bi/8] & m) == 0)
panic("freeing free block");
bp->data[bi/8] &= ~m; // Mark block free on disk.
bwrite(bp);
brelse(bp);
}
2006-08-09 03:09:36 +02:00
2007-08-27 16:23:48 +02:00
// Inodes.
//
// An inode is a single, unnamed file in the file system.
// The inode disk structure holds metadata (the type, device numbers,
// and data size) along with a list of blocks where the associated
// data can be found.
2007-08-20 20:23:52 +02:00
//
// The inodes are laid out sequentially on disk immediately after
// the superblock. The kernel keeps a cache of the in-use
// on-disk structures to provide a place for synchronizing access
// to inodes shared between multiple processes.
//
2007-08-24 21:32:36 +02:00
// ip->ref counts the number of pointer references to this cached
2007-08-20 20:23:52 +02:00
// inode; references are typically kept in struct file and in cp->cwd.
// When ip->ref falls to zero, the inode is no longer cached.
// It is an error to use an inode without holding a reference to it.
//
// Processes are only allowed to read and write inode
2007-08-27 16:23:48 +02:00
// metadata and contents when holding the inode's lock,
// represented by the I_BUSY flag in the in-memory copy.
2007-08-24 21:32:36 +02:00
// Because inode locks are held during disk accesses,
// they are implemented using a flag rather than with
// spin locks. Callers are responsible for locking
// inodes before passing them to routines in this file; leaving
// this responsibility with the caller makes it possible for them
// to create arbitrarily-sized atomic operations.
//
// To give maximum control over locking to the callers,
// the routines in this file that return inode pointers
// return pointers to *unlocked* inodes. It is the callers'
2007-08-27 16:23:48 +02:00
// responsibility to lock them before using them. A non-zero
2007-08-24 21:32:36 +02:00
// ip->ref keeps these unlocked inodes in the cache.
2007-08-20 20:23:52 +02:00
struct {
struct spinlock lock;
struct inode inode[NINODE];
} icache;
void
iinit(void)
{
initlock(&icache.lock, "icache.lock");
}
2006-09-06 19:50:20 +02:00
// Find the inode with number inum on device dev
2007-08-27 16:31:50 +02:00
// and return the in-memory copy.
static struct inode*
iget(uint dev, uint inum)
{
2007-08-20 20:23:52 +02:00
struct inode *ip, *empty;
2007-08-20 20:23:52 +02:00
acquire(&icache.lock);
2007-08-20 20:23:52 +02:00
// Try for cached inode.
empty = 0;
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
2006-09-07 17:15:32 +02:00
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
ip->ref++;
2007-08-20 20:23:52 +02:00
release(&icache.lock);
return ip;
}
2007-08-20 20:23:52 +02:00
if(empty == 0 && ip->ref == 0) // Remember empty slot.
empty = ip;
}
2007-08-20 20:23:52 +02:00
// Allocate fresh inode.
if(empty == 0)
2007-08-08 11:50:46 +02:00
panic("iget: no inodes");
2007-08-20 20:23:52 +02:00
ip = empty;
ip->dev = dev;
ip->inum = inum;
ip->ref = 1;
ip->flags = 0;
2007-08-20 20:23:52 +02:00
release(&icache.lock);
return ip;
2007-08-20 20:23:52 +02:00
}
// Increment reference count for ip.
// Returns ip to enable ip = idup(ip1) idiom.
struct inode*
idup(struct inode *ip)
2007-08-20 20:23:52 +02:00
{
acquire(&icache.lock);
ip->ref++;
release(&icache.lock);
return ip;
}
2007-08-20 20:23:52 +02:00
// Lock the given inode.
void
ilock(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
if(ip == 0 || ip->ref < 1)
panic("ilock");
2007-08-20 20:23:52 +02:00
acquire(&icache.lock);
while(ip->flags & I_BUSY)
2007-08-20 20:23:52 +02:00
sleep(ip, &icache.lock);
ip->flags |= I_BUSY;
2007-08-20 20:23:52 +02:00
release(&icache.lock);
if(!(ip->flags & I_VALID)){
bp = bread(ip->dev, IBLOCK(ip->inum));
2007-08-28 06:13:24 +02:00
dip = (struct dinode*)bp->data + ip->inum%IPB;
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
ip->flags |= I_VALID;
if(ip->type == 0)
panic("ilock: no type");
}
2007-08-20 20:23:52 +02:00
}
// Unlock the given inode.
void
2007-08-20 20:23:52 +02:00
iunlock(struct inode *ip)
{
if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
2007-08-20 20:23:52 +02:00
panic("iunlock");
acquire(&icache.lock);
ip->flags &= ~I_BUSY;
2007-08-20 20:23:52 +02:00
wakeup(ip);
release(&icache.lock);
}
// Caller holds reference to unlocked ip. Drop reference.
2007-08-20 20:23:52 +02:00
void
iput(struct inode *ip)
2007-08-20 20:23:52 +02:00
{
acquire(&icache.lock);
2007-08-28 20:32:08 +02:00
if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
// inode is no longer used: truncate and free inode.
if(ip->flags & I_BUSY)
panic("iput busy");
ip->flags |= I_BUSY;
release(&icache.lock);
itrunc(ip);
ip->type = 0;
iupdate(ip);
acquire(&icache.lock);
ip->flags &= ~I_BUSY;
wakeup(ip);
}
ip->ref--;
release(&icache.lock);
}
2007-08-27 16:31:50 +02:00
// Common idiom: unlock, then put.
void
iunlockput(struct inode *ip)
{
iunlock(ip);
iput(ip);
}
2007-08-25 00:17:54 +02:00
//PAGEBREAK!
2007-08-20 20:23:52 +02:00
// Allocate a new inode with the given type on device dev.
struct inode*
2006-08-08 20:07:37 +02:00
ialloc(uint dev, short type)
{
int inum;
struct buf *bp;
2007-08-10 18:52:31 +02:00
struct dinode *dip;
struct superblock sb;
2006-08-09 03:09:36 +02:00
readsb(dev, &sb);
2007-08-28 20:32:08 +02:00
for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks
2006-08-09 03:09:36 +02:00
bp = bread(dev, IBLOCK(inum));
2007-08-28 06:13:24 +02:00
dip = (struct dinode*)bp->data + inum%IPB;
2007-08-28 20:32:08 +02:00
if(dip->type == 0){ // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
bwrite(bp); // mark it allocated on the disk
brelse(bp);
return iget(dev, inum);
2006-08-08 20:07:37 +02:00
}
brelse(bp);
}
panic("ialloc: no inodes");
2006-08-08 20:07:37 +02:00
}
2007-08-20 20:23:52 +02:00
// Copy inode, which has changed, from memory to disk.
void
2007-08-20 20:23:52 +02:00
iupdate(struct inode *ip)
2006-07-22 00:10:40 +02:00
{
2007-08-20 20:23:52 +02:00
struct buf *bp;
struct dinode *dip;
2006-07-22 00:10:40 +02:00
2007-08-20 20:23:52 +02:00
bp = bread(ip->dev, IBLOCK(ip->inum));
2007-08-28 06:13:24 +02:00
dip = (struct dinode*)bp->data + ip->inum%IPB;
2007-08-20 20:23:52 +02:00
dip->type = ip->type;
dip->major = ip->major;
dip->minor = ip->minor;
dip->nlink = ip->nlink;
dip->size = ip->size;
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
bwrite(bp);
2007-08-20 20:23:52 +02:00
brelse(bp);
2006-07-22 00:10:40 +02:00
}
2007-08-25 00:17:54 +02:00
//PAGEBREAK!
2007-08-20 20:23:52 +02:00
// Inode contents
//
// The contents (data) associated with each inode is stored
// in a sequence of blocks on the disk. The first NDIRECT blocks
2007-08-27 16:31:50 +02:00
// are listed in ip->addrs[]. The next NINDIRECT blocks are
2007-08-20 20:23:52 +02:00
// listed in the block ip->addrs[INDIRECT].
2006-09-07 16:28:12 +02:00
// Return the disk block address of the nth block in inode ip.
// If there is no such block, alloc controls whether one is allocated.
static uint
2007-08-20 20:23:52 +02:00
bmap(struct inode *ip, uint bn, int alloc)
{
2007-08-20 20:23:52 +02:00
uint addr, *a;
struct buf *bp;
2007-08-28 20:32:08 +02:00
if(bn < NDIRECT){
if((addr = ip->addrs[bn]) == 0){
2007-08-20 20:23:52 +02:00
if(!alloc)
return -1;
ip->addrs[bn] = addr = balloc(ip->dev);
}
return addr;
}
2007-08-20 20:23:52 +02:00
bn -= NDIRECT;
2007-08-28 20:32:08 +02:00
if(bn < NINDIRECT){
2007-08-20 20:23:52 +02:00
// Load indirect block, allocating if necessary.
if((addr = ip->addrs[NDIRECT]) == 0){
2007-08-20 20:23:52 +02:00
if(!alloc)
return -1;
ip->addrs[NDIRECT] = addr = balloc(ip->dev);
2007-08-20 20:23:52 +02:00
}
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
2007-08-28 20:32:08 +02:00
if((addr = a[bn]) == 0){
if(!alloc){
2007-08-20 20:23:52 +02:00
brelse(bp);
return -1;
}
a[bn] = addr = balloc(ip->dev);
bwrite(bp);
2007-08-20 20:23:52 +02:00
}
brelse(bp);
return addr;
}
panic("bmap: out of range");
}
2007-08-20 20:23:52 +02:00
// Truncate inode (discard contents).
// Only called after the last dirent referring
// to this inode has been erased on disk.
2007-08-21 21:58:55 +02:00
static void
2006-08-30 20:55:06 +02:00
itrunc(struct inode *ip)
{
int i, j;
2007-08-20 20:23:52 +02:00
struct buf *bp;
2007-08-10 18:52:31 +02:00
uint *a;
2007-08-28 20:32:08 +02:00
for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){
bfree(ip->dev, ip->addrs[i]);
ip->addrs[i] = 0;
}
}
2007-08-20 20:23:52 +02:00
if(ip->addrs[NDIRECT]){
bp = bread(ip->dev, ip->addrs[NDIRECT]);
2007-08-20 20:23:52 +02:00
a = (uint*)bp->data;
2007-08-28 20:32:08 +02:00
for(j = 0; j < NINDIRECT; j++){
2007-08-20 20:23:52 +02:00
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
bfree(ip->dev, ip->addrs[NDIRECT]);
ip->addrs[NDIRECT] = 0;
2006-08-30 20:55:06 +02:00
}
2007-08-20 20:23:52 +02:00
ip->size = 0;
iupdate(ip);
2006-08-15 17:53:46 +02:00
}
2006-09-07 16:28:12 +02:00
// Copy stat information from inode.
2006-08-12 06:33:50 +02:00
void
stati(struct inode *ip, struct stat *st)
{
2006-09-07 15:08:23 +02:00
st->dev = ip->dev;
st->ino = ip->inum;
st->type = ip->type;
st->nlink = ip->nlink;
st->size = ip->size;
2006-08-12 06:33:50 +02:00
}
//PAGEBREAK!
2006-09-07 16:28:12 +02:00
// Read data from inode.
2006-07-27 23:10:00 +02:00
int
readi(struct inode *ip, char *dst, uint off, uint n)
2006-07-27 23:10:00 +02:00
{
2007-08-20 20:23:52 +02:00
uint tot, m;
2006-07-27 23:10:00 +02:00
struct buf *bp;
2007-08-28 20:32:08 +02:00
if(ip->type == T_DEV){
2006-09-07 15:08:23 +02:00
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
return -1;
return devsw[ip->major].read(ip, dst, n);
}
2007-08-27 16:31:50 +02:00
if(off > ip->size || off + n < off)
2007-08-20 20:23:52 +02:00
return -1;
if(off + n > ip->size)
n = ip->size - off;
2006-07-27 23:10:00 +02:00
2007-08-28 20:32:08 +02:00
for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
2007-08-20 20:23:52 +02:00
bp = bread(ip->dev, bmap(ip, off/BSIZE, 0));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(dst, bp->data + off%BSIZE, m);
brelse(bp);
}
2007-08-20 20:23:52 +02:00
return n;
}
// PAGEBREAK!
2006-09-07 16:28:12 +02:00
// Write data to inode.
int
2007-08-20 20:23:52 +02:00
writei(struct inode *ip, char *src, uint off, uint n)
{
2007-08-20 20:23:52 +02:00
uint tot, m;
2007-08-10 18:52:31 +02:00
struct buf *bp;
2007-08-28 20:32:08 +02:00
if(ip->type == T_DEV){
2006-09-07 15:08:23 +02:00
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
return -1;
return devsw[ip->major].write(ip, src, n);
2007-08-10 18:52:31 +02:00
}
2007-08-20 20:23:52 +02:00
if(off + n < off)
return -1;
if(off + n > MAXFILE*BSIZE)
n = MAXFILE*BSIZE - off;
2007-08-28 20:32:08 +02:00
for(tot=0; tot<n; tot+=m, off+=m, src+=m){
2007-08-20 20:23:52 +02:00
bp = bread(ip->dev, bmap(ip, off/BSIZE, 1));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(bp->data + off%BSIZE, src, m);
bwrite(bp);
2007-08-10 18:52:31 +02:00
brelse(bp);
}
2007-08-20 20:23:52 +02:00
2007-08-28 20:32:08 +02:00
if(n > 0 && off > ip->size){
2007-08-20 20:23:52 +02:00
ip->size = off;
2007-08-10 18:52:31 +02:00
iupdate(ip);
}
2007-08-20 20:23:52 +02:00
return n;
}
//PAGEBREAK!
2007-08-20 20:23:52 +02:00
// Directories
2007-08-09 21:05:00 +02:00
int
2007-08-21 21:58:55 +02:00
namecmp(const char *s, const char *t)
{
2007-08-24 23:00:02 +02:00
return strncmp(s, t, DIRSIZ);
2007-08-21 21:58:55 +02:00
}
2007-08-09 21:05:00 +02:00
// Look for a directory entry in a directory.
// If found, set *poff to byte offset of entry.
2007-08-24 16:56:17 +02:00
// Caller must have already locked dp.
struct inode*
2007-08-21 21:58:55 +02:00
dirlookup(struct inode *dp, char *name, uint *poff)
2007-08-09 21:05:00 +02:00
{
uint off, inum;
2007-08-09 21:05:00 +02:00
struct buf *bp;
struct dirent *de;
if(dp->type != T_DIR)
2007-08-24 16:56:17 +02:00
panic("dirlookup not DIR");
2007-08-09 21:05:00 +02:00
for(off = 0; off < dp->size; off += BSIZE){
2007-08-20 20:23:52 +02:00
bp = bread(dp->dev, bmap(dp, off / BSIZE, 0));
2007-08-27 16:31:50 +02:00
for(de = (struct dirent*)bp->data;
de < (struct dirent*)(bp->data + BSIZE);
2007-08-09 21:05:00 +02:00
de++){
if(de->inum == 0)
continue;
2007-08-21 21:58:55 +02:00
if(namecmp(name, de->name) == 0){
2007-08-09 21:05:00 +02:00
// entry matches path element
2007-08-20 21:37:15 +02:00
if(poff)
*poff = off + (uchar*)de - bp->data;
inum = de->inum;
2007-08-09 21:05:00 +02:00
brelse(bp);
return iget(dp->dev, inum);
2007-08-09 21:05:00 +02:00
}
}
brelse(bp);
}
return 0;
2007-08-09 21:05:00 +02:00
}
2007-08-20 20:23:52 +02:00
// Write a new directory entry (name, ino) into the directory dp.
int
2007-08-21 21:58:55 +02:00
dirlink(struct inode *dp, char *name, uint ino)
2007-08-20 20:23:52 +02:00
{
2007-08-20 21:37:15 +02:00
int off;
2007-08-20 20:23:52 +02:00
struct dirent de;
struct inode *ip;
// Check that name is not present.
if((ip = dirlookup(dp, name, 0)) != 0){
iput(ip);
return -1;
}
2007-08-20 20:23:52 +02:00
// Look for an empty dirent.
for(off = 0; off < dp->size; off += sizeof(de)){
if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
2007-08-27 16:31:50 +02:00
panic("dirlink read");
2007-08-20 20:23:52 +02:00
if(de.inum == 0)
break;
}
2007-08-24 23:00:02 +02:00
strncpy(de.name, name, DIRSIZ);
2007-08-20 20:23:52 +02:00
de.inum = ino;
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
2007-08-27 16:31:50 +02:00
panic("dirlink");
return 0;
2007-08-20 20:23:52 +02:00
}
2007-08-25 00:17:54 +02:00
//PAGEBREAK!
2007-08-20 20:23:52 +02:00
// Paths
// Copy the next path element from path into name.
// Return a pointer to the element following the copied one.
// The returned path has no leading slashes,
// so the caller can check *path=='\0' to see if the name is the last one.
// If no name to remove, return 0.
2007-08-20 20:23:52 +02:00
//
// Examples:
// skipelem("a/bb/c", name) = "bb/c", setting name = "a"
2007-08-27 16:31:50 +02:00
// skipelem("///a//bb", name) = "bb", setting name = "a"
// skipelem("", name) = skipelem("////", name) = 0
2007-08-20 20:23:52 +02:00
//
static char*
2007-08-21 21:58:55 +02:00
skipelem(char *path, char *name)
2007-08-20 20:23:52 +02:00
{
2007-08-21 21:58:55 +02:00
char *s;
int len;
2007-08-20 20:23:52 +02:00
while(*path == '/')
path++;
if(*path == 0)
return 0;
2007-08-21 21:58:55 +02:00
s = path;
2007-08-20 20:23:52 +02:00
while(*path != '/' && *path != 0)
path++;
2007-08-21 21:58:55 +02:00
len = path - s;
if(len >= DIRSIZ)
memmove(name, s, DIRSIZ);
2007-08-28 20:37:41 +02:00
else {
2007-08-21 21:58:55 +02:00
memmove(name, s, len);
name[len] = 0;
}
2007-08-20 20:23:52 +02:00
while(*path == '/')
path++;
return path;
}
// Look up and return the inode for a path name.
2007-08-28 21:39:49 +02:00
// If parent != 0, return the inode for the parent and copy the final
// path element into name, which must have room for DIRSIZ bytes.
static struct inode*
2007-08-21 21:58:55 +02:00
_namei(char *path, int parent, char *name)
2006-07-22 00:10:40 +02:00
{
struct inode *ip, *next;
2007-08-09 21:05:00 +02:00
if(*path == '/')
2008-10-08 20:57:13 +02:00
ip = iget(ROOTDEV, ROOTINO);
else
ip = idup(cp->cwd);
2006-07-22 00:10:40 +02:00
2007-08-21 21:58:55 +02:00
while((path = skipelem(path, name)) != 0){
ilock(ip);
if(ip->type != T_DIR){
iunlockput(ip);
return 0;
}
2007-08-20 21:37:15 +02:00
if(parent && *path == '\0'){
// Stop one level early.
iunlock(ip);
return ip;
}
2007-08-27 16:31:50 +02:00
if((next = dirlookup(ip, name, 0)) == 0){
iunlockput(ip);
return 0;
}
iunlockput(ip);
ip = next;
2006-07-22 00:10:40 +02:00
}
2007-08-24 16:56:17 +02:00
if(parent){
iput(ip);
2007-08-09 21:05:00 +02:00
return 0;
2007-08-24 16:56:17 +02:00
}
return ip;
2006-07-22 00:10:40 +02:00
}
2006-08-08 20:07:37 +02:00
2007-08-27 16:31:50 +02:00
struct inode*
namei(char *path)
{
char name[DIRSIZ];
return _namei(path, 0, name);
}
2007-08-28 21:39:49 +02:00
2007-08-27 16:31:50 +02:00
struct inode*
nameiparent(char *path, char *name)
{
return _namei(path, 1, name);
}