This commit is contained in:
rtm 2006-06-12 15:22:12 +00:00
commit 55e95b16db
18 changed files with 1505 additions and 0 deletions

30
Makefile Normal file
View File

@ -0,0 +1,30 @@
OBJS = main.o console.o string.o kalloc.o proc.o trapasm.o
CC = i386-jos-elf-gcc
LD = i386-jos-elf-ld
OBJCOPY = i386-jos-elf-objcopy
OBJDUMP = i386-jos-elf-objdump
xv6.img : bootblock kernel
dd if=/dev/zero of=xv6.img count=10000
dd if=bootblock of=xv6.img conv=notrunc
dd if=kernel of=xv6.img seek=1 conv=notrunc
bootblock : bootasm.S bootmain.c
$(CC) -O -nostdinc -I. -c bootmain.c
$(CC) -nostdinc -I. -c bootasm.S
$(LD) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o
$(OBJDUMP) -S bootblock.o > bootblock.asm
$(OBJCOPY) -S -O binary bootblock.o bootblock
./sign.pl bootblock
kernel : $(OBJS)
$(LD) -Ttext 0x100000 -e main -o kernel $(OBJS)
$(OBJDUMP) -S kernel > kernel.asm
%.o: %.c
$(CC) -nostdinc -I. -O -c -o $@ $<
clean :
rm -f bootmain.o bootasm.o bootblock.o bootblock
rm -f kernel main.o kernel.asm xv6.img

67
Notes Normal file
View File

@ -0,0 +1,67 @@
bootmain.c doesn't work right if the ELF sections aren't
sector-aligned. so you can't use ld -N. and the sections may also need
to be non-zero length, only really matters for tiny "kernels".
kernel loaded at 1 megabyte. stack same place that bootasm.S left it.
kinit() should find real mem size
and rescue useable memory below 1 meg
no paging, no use of page table hardware, just segments
no user area: no magic kernel stack mapping
so no copying of kernel stack during fork
though there is a kernel stack page for each process
no kernel malloc(), just kalloc() for user core
user pointers aren't valid in the kernel
setting up first process
we do want a process zero, as template
but not runnable
just set up return-from-trap frame on new kernel stack
fake user program that calls exec
map text read-only?
shared text?
what's on the stack during a trap or sys call?
PUSHA before scheduler switch? for callee-saved registers.
segment contents?
what does iret need to get out of the kernel?
how does INT know what kernel stack to use?
are interrupts turned on in the kernel? probably.
per-cpu curproc
one tss per process, or one per cpu?
one segment array per cpu, or per process?
pass curproc explicitly, or implicit from cpu #?
e.g. argument to newproc()?
test stack expansion
test running out of memory, process slots
we can't really use a separate stack segment, since stack addresses
need to work correctly as ordinary pointers. the same may be true of
data vs text. how can we have a gap between data and stack, so that
both can grow, without committing 4GB of physical memory? does this
mean we need paging?
what's the simplest way to add the paging we need?
one page table, re-write it each time we leave the kernel?
page table per process?
probably need to use 0-0xffffffff segments, so that
both data and stack pointers always work
so is it now worth it to make a process's phys mem contiguous?
or could use segment limits and 4 meg pages?
but limits would prevent using stack pointers as data pointers
how to write-protect text? not important?
perhaps have fixed-size stack, put it in the data segment?
oops, if kernel stack is in contiguous user phys mem, then moving
users' memory (e.g. to expand it) will wreck any pointers into the
kernel stack.

109
bootasm.S Normal file
View File

@ -0,0 +1,109 @@
#define SEG_NULL \
.word 0, 0; \
.byte 0, 0, 0, 0
#define SEG(type,base,lim) \
.word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \
.byte (((base) >> 16) & 0xff), (0x90 | (type)), \
(0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
#define STA_X 0x8 // Executable segment
#define STA_E 0x4 // Expand down (non-executable segments)
#define STA_C 0x4 // Conforming code segment (executable only)
#define STA_W 0x2 // Writeable (non-executable segments)
#define STA_R 0x2 // Readable (executable segments)
#define STA_A 0x1 // Accessed
.set PROT_MODE_CSEG,0x8 # code segment selector
.set PROT_MODE_DSEG,0x10 # data segment selector
.set CR0_PE_ON,0x1 # protected mode enable flag
###################################################################################
# ENTRY POINT
# This code should be stored in the first sector of the hard disk.
# After the BIOS initializes the hardware on startup or system reset,
# it loads this code at physical address 0x7c00 - 0x7d00 (512 bytes).
# Then the BIOS jumps to the beginning of it, address 0x7c00,
# while running in 16-bit real-mode (8086 compatibility mode).
# The Code Segment register (CS) is initially zero on entry.
#
# This code switches into 32-bit protected mode so that all of
# memory can accessed, then calls into C.
###################################################################################
.globl start # Entry point
start: .code16 # This runs in real mode
cli # Disable interrupts
cld # String operations increment
# Set up the important data segment registers (DS, ES, SS).
xorw %ax,%ax # Segment number zero
movw %ax,%ds # -> Data Segment
movw %ax,%es # -> Extra Segment
movw %ax,%ss # -> Stack Segment
# Set up the stack pointer, growing downward from 0x7c00.
movw $start,%sp # Stack Pointer
#### Enable A20:
#### For fascinating historical reasons (related to the fact that
#### the earliest 8086-based PCs could only address 1MB of physical memory
#### and subsequent 80286-based PCs wanted to retain maximum compatibility),
#### physical address line 20 is tied to low when the machine boots.
#### Obviously this a bit of a drag for us, especially when trying to
#### address memory above 1MB. This code undoes this.
seta20.1: inb $0x64,%al # Get status
testb $0x2,%al # Busy?
jnz seta20.1 # Yes
movb $0xd1,%al # Command: Write
outb %al,$0x64 # output port
seta20.2: inb $0x64,%al # Get status
testb $0x2,%al # Busy?
jnz seta20.2 # Yes
movb $0xdf,%al # Enable
outb %al,$0x60 # A20
#### Switch from real to protected mode
#### The descriptors in our GDT allow all physical memory to be accessed.
#### Furthermore, the descriptors have base addresses of 0, so that the
#### segment translation is a NOP, ie. virtual addresses are identical to
#### their physical addresses. With this setup, immediately after
#### enabling protected mode it will still appear to this code
#### that it is running directly on physical memory with no translation.
#### This initial NOP-translation setup is required by the processor
#### to ensure that the transition to protected mode occurs smoothly.
real_to_prot: cli # Mandatory since we dont set up an IDT
lgdt gdtdesc # load GDT -- mandatory in protected mode
movl %cr0, %eax # turn on protected mode
orl $CR0_PE_ON, %eax #
movl %eax, %cr0 #
### CPU magic: jump to relocation, flush prefetch queue, and reload %cs
### Has the effect of just jmp to the next instruction, but simultaneous
### loads CS with $PROT_MODE_CSEG.
ljmp $PROT_MODE_CSEG, $protcseg
#### we are in 32-bit protected mode (hence the .code32)
.code32
protcseg:
# Set up the protected-mode data segment registers
movw $PROT_MODE_DSEG, %ax # Our data segment selector
movw %ax, %ds # -> DS: Data Segment
movw %ax, %es # -> ES: Extra Segment
movw %ax, %fs # -> FS
movw %ax, %gs # -> GS
movw %ax, %ss # -> SS: Stack Segment
call cmain # finish the boot load from C.
# cmain() should not return
spin: jmp spin # ..but in case it does, spin
.p2align 2 # force 4 byte alignment
gdt:
SEG_NULL # null seg
SEG(STA_X|STA_R, 0x0, 0xffffffff) # code seg
SEG(STA_W, 0x0, 0xffffffff) # data seg
gdtdesc:
.word 0x17 # sizeof(gdt) - 1
.long gdt # address gdt

121
bootmain.c Normal file
View File

@ -0,0 +1,121 @@
#include <types.h>
#include <elf.h>
#include <x86.h>
/**********************************************************************
* This a dirt simple boot loader, whose sole job is to boot
* an elf kernel image from the first IDE hard disk.
*
* DISK LAYOUT
* * This program(boot.S and main.c) is the bootloader. It should
* be stored in the first sector of the disk.
*
* * The 2nd sector onward holds the kernel image.
*
* * The kernel image must be in ELF format.
*
* BOOT UP STEPS
* * when the CPU boots it loads the BIOS into memory and executes it
*
* * the BIOS intializes devices, sets of the interrupt routines, and
* reads the first sector of the boot device(e.g., hard-drive)
* into memory and jumps to it.
*
* * Assuming this boot loader is stored in the first sector of the
* hard-drive, this code takes over...
*
* * control starts in bootloader.S -- which sets up protected mode,
* and a stack so C code then run, then calls cmain()
*
* * cmain() in this file takes over, reads in the kernel and jumps to it.
**********************************************************************/
#define SECTSIZE 512
#define ELFHDR ((struct Elf *) 0x10000) // scratch space
void readsect(void*, uint32_t);
void readseg(uint32_t, uint32_t, uint32_t);
void
cmain(void)
{
struct Proghdr *ph, *eph;
// read 1st page off disk
readseg((uint32_t) ELFHDR, SECTSIZE*8, 0);
// is this a valid ELF?
if (ELFHDR->e_magic != ELF_MAGIC)
goto bad;
// load each program segment (ignores ph flags)
ph = (struct Proghdr *) ((uint8_t *) ELFHDR + ELFHDR->e_phoff);
eph = ph + ELFHDR->e_phnum;
for (; ph < eph; ph++)
readseg(ph->p_va, ph->p_memsz, ph->p_offset);
// call the entry point from the ELF header
// note: does not return!
((void (*)(void)) (ELFHDR->e_entry & 0xFFFFFF))();
bad:
outw(0x8A00, 0x8A00);
outw(0x8A00, 0x8E00);
while (1)
/* do nothing */;
}
// Read 'count' bytes at 'offset' from kernel into virtual address 'va'.
// Might copy more than asked
void
readseg(uint32_t va, uint32_t count, uint32_t offset)
{
uint32_t end_va;
va &= 0xFFFFFF;
end_va = va + count;
// round down to sector boundary
va &= ~(SECTSIZE - 1);
// translate from bytes to sectors, and kernel starts at sector 1
offset = (offset / SECTSIZE) + 1;
// If this is too slow, we could read lots of sectors at a time.
// We'd write more to memory than asked, but it doesn't matter --
// we load in increasing order.
while (va < end_va) {
readsect((uint8_t*) va, offset);
va += SECTSIZE;
offset++;
}
}
void
waitdisk(void)
{
// wait for disk reaady
while ((inb(0x1F7) & 0xC0) != 0x40)
/* do nothing */;
}
void
readsect(void *dst, uint32_t offset)
{
// wait for disk to be ready
waitdisk();
outb(0x1F2, 1); // count = 1
outb(0x1F3, offset);
outb(0x1F4, offset >> 8);
outb(0x1F5, offset >> 16);
outb(0x1F6, (offset >> 24) | 0xE0);
outb(0x1F7, 0x20); // cmd 0x20 - read sectors
// wait for disk to be ready
waitdisk();
// read a sector
insl(0x1F0, dst, SECTSIZE/4);
}

108
console.c Normal file
View File

@ -0,0 +1,108 @@
#include <types.h>
#include <x86.h>
#include "defs.h"
void
cons_putc(int c)
{
int crtport = 0x3d4; // io port of CGA
unsigned short *crt = (unsigned short *) 0xB8000; // base of CGA memory
int ind;
// cursor position, 16 bits, col + 80*row
outb(crtport, 14);
ind = inb(crtport + 1) << 8;
outb(crtport, 15);
ind |= inb(crtport + 1);
c &= 0xff;
if(c == '\n'){
ind -= (ind % 80);
ind += 80;
} else {
c |= 0x0700; // black on white
crt[ind] = c;
ind += 1;
}
if((ind / 80) >= 24){
// scroll up
memcpy(crt, crt + 80, sizeof(crt[0]) * (23 * 80));
ind -= 80;
memset(crt + ind, 0, sizeof(crt[0]) * ((24 * 80) - ind));
}
outb(crtport, 14);
outb(crtport + 1, ind >> 8);
outb(crtport, 15);
outb(crtport + 1, ind);
}
void
printint(int xx, int base, int sgn)
{
char buf[16];
char digits[] = "0123456789ABCDEF";
int i = 0, neg = 0;
unsigned int x;
if(sgn && xx < 0){
neg = 1;
x = 0 - xx;
} else {
x = xx;
}
do {
buf[i++] = digits[x % base];
} while((x /= base) != 0);
if(neg)
buf[i++] = '-';
while(i > 0){
i -= 1;
cons_putc(buf[i]);
}
}
/*
* print to the console. only understands %d and %x.
*/
void
cprintf(char *fmt, ...)
{
int i, state = 0, c;
unsigned int *ap = (unsigned int *) &fmt + 1;
for(i = 0; fmt[i]; i++){
c = fmt[i] & 0xff;
if(state == 0){
if(c == '%'){
state = '%';
} else {
cons_putc(c);
}
} else if(state == '%'){
if(c == 'd'){
printint(*ap, 10, 1);
ap++;
} else if(c == 'x'){
printint(*ap, 16, 0);
ap++;
} else if(c == '%'){
cons_putc(c);
}
state = 0;
}
}
}
void
panic(char *s)
{
cprintf(s, 0);
cprintf("\n", 0);
while(1)
;
}

12
defs.h Normal file
View File

@ -0,0 +1,12 @@
// kalloc.c
char *kalloc(int n);
void kfree(char *cp, int len);
// console.c
void cprintf(char *fmt, ...);
void panic(char *s);
// proc.c
struct proc;
void setupsegs(struct proc *p);
struct proc * newproc(struct proc *op);

43
elf.h Normal file
View File

@ -0,0 +1,43 @@
#ifndef JOS_INC_ELF_H
#define JOS_INC_ELF_H
#define ELF_MAGIC 0x464C457FU /* "\x7FELF" in little endian */
struct Elf {
uint32_t e_magic; // must equal ELF_MAGIC
uint8_t e_elf[12];
uint16_t e_type;
uint16_t e_machine;
uint32_t e_version;
uint32_t e_entry;
uint32_t e_phoff;
uint32_t e_shoff;
uint32_t e_flags;
uint16_t e_ehsize;
uint16_t e_phentsize;
uint16_t e_phnum;
uint16_t e_shentsize;
uint16_t e_shnum;
uint16_t e_shstrndx;
};
struct Proghdr {
uint32_t p_type;
uint32_t p_offset;
uint32_t p_va;
uint32_t p_pa;
uint32_t p_filesz;
uint32_t p_memsz;
uint32_t p_flags;
uint32_t p_align;
};
// Values for Proghdr::p_type
#define ELF_PROG_LOAD 1
// Flag bits for Proghdr::p_flags
#define ELF_PROG_FLAG_EXEC 1
#define ELF_PROG_FLAG_WRITE 2
#define ELF_PROG_FLAG_READ 4
#endif /* !JOS_INC_ELF_H */

158
kalloc.c Normal file
View File

@ -0,0 +1,158 @@
/*
* physical memory allocator, intended to be used to allocate
* memory for user processes. allocates in 4096-byte "pages".
* free list is sorted and combines adjacent pages into
* long runs, to make it easier to allocate big segments.
* one reason the page size is 4k is that the x86 segment size
* granularity is 4k.
*/
#include "param.h"
#include "types.h"
#include "defs.h"
struct run {
struct run *next;
int len; // bytes
};
struct run *freelist;
void ktest();
/*
* initialize free list of physical pages. this code
* cheats by just considering the one megabyte of pages
* after _end.
*/
void
kinit()
{
extern int end;
unsigned mem;
char *start;
start = (char *) &end;
start = (char *) (((unsigned)start + PAGE) & ~(PAGE-1));
mem = 256; // XXX
cprintf("mem = %d\n", mem * PAGE);
kfree(start, mem * PAGE);
ktest();
}
void
kfree(char *cp, int len)
{
struct run **rr;
struct run *p = (struct run *) cp;
struct run *pend = (struct run *) (cp + len);
if(len % PAGE)
panic("kfree");
rr = &freelist;
while(*rr){
struct run *rend = (struct run *) ((char *)(*rr) + (*rr)->len);
if(p >= *rr && p < rend)
panic("freeing free page");
if(pend == *rr){
p->len = len + (*rr)->len;
p->next = (*rr)->next;
*rr = p;
return;
}
if(pend < *rr){
p->len = len;
p->next = *rr;
*rr = p;
return;
}
if(p == rend){
(*rr)->len += len;
if((*rr)->next && (*rr)->next == pend){
(*rr)->len += (*rr)->next->len;
(*rr)->next = (*rr)->next->next;
}
return;
}
rr = &((*rr)->next);
}
p->len = len;
p->next = 0;
*rr = p;
}
/*
* allocate n bytes of physical memory.
* returns a kernel-segment pointer.
* returns 0 if there's no run that's big enough.
*/
char *
kalloc(int n)
{
struct run **rr;
if(n % PAGE)
panic("kalloc");
rr = &freelist;
while(*rr){
struct run *r = *rr;
if(r->len == n){
*rr = r->next;
return (char *) r;
}
if(r->len > n){
char *p = (char *)r + (r->len - n);
r->len -= n;
return p;
}
rr = &(*rr)->next;
}
return 0;
}
void
ktest()
{
char *p1, *p2, *p3;
// test coalescing
p1 = kalloc(4 * PAGE);
kfree(p1 + 3*PAGE, PAGE);
kfree(p1 + 2*PAGE, PAGE);
kfree(p1, PAGE);
kfree(p1 + PAGE, PAGE);
p2 = kalloc(4 * PAGE);
if(p2 != p1)
panic("ktest");
kfree(p2, 4 * PAGE);
// test finding first run that fits
p1 = kalloc(1 * PAGE);
p2 = kalloc(1 * PAGE);
kfree(p1, PAGE);
p3 = kalloc(2 * PAGE);
kfree(p2, PAGE);
kfree(p3, 2 * PAGE);
// test running out of memory
p1 = 0;
while(1){
p2 = kalloc(PAGE);
if(p2 == 0)
break;
*(char **)p2 = p1;
p1 = p2;
}
while(p1){
p2 = *(char **)p1;
kfree(p1, PAGE);
p1 = p2;
}
p1 = kalloc(PAGE * 20);
if(p1 == 0)
panic("ktest2");
kfree(p1, PAGE * 20);
cprintf("ktest ok\n");
}

40
main.c Normal file
View File

@ -0,0 +1,40 @@
#include "types.h"
#include "param.h"
#include "mmu.h"
#include "proc.h"
#include "defs.h"
#include "x86.h"
char junk1[20000];
char junk2[20000] = { 1 };
main()
{
struct proc *p;
cprintf("\nxV6\n\n");
// initialize physical memory allocator
kinit();
// create fake process zero
p = &proc[0];
p->state = WAITING;
p->sz = PAGE;
p->mem = kalloc(p->sz);
memset(p->mem, 0, p->sz);
p->kstack = kalloc(KSTACKSIZE);
p->tf = (struct Trapframe *) (p->kstack + KSTACKSIZE - sizeof(struct Trapframe));
memset(p->tf, 0, sizeof(struct Trapframe));
p->tf->tf_es = p->tf->tf_ds = p->tf->tf_ss = (SEG_UDATA << 3) | 3;
p->tf->tf_cs = (SEG_UCODE << 3) | 3;
p->tf->tf_eflags = FL_IF;
setupsegs(p);
p = newproc(&proc[0]);
// xxx copy instructions to p->mem
p->tf->tf_eip = 0;
p->tf->tf_esp = p->sz;
swtch(&proc[0]);
}

308
mmu.h Normal file
View File

@ -0,0 +1,308 @@
/*
* This file contains definitions for the x86 memory management unit (MMU),
* including paging- and segmentation-related data structures and constants,
* the %cr0, %cr4, and %eflags registers, and traps.
*/
/*
*
* Part 1. Paging data structures and constants.
*
*/
// A linear address 'la' has a three-part structure as follows:
//
// +--------10------+-------10-------+---------12----------+
// | Page Directory | Page Table | Offset within Page |
// | Index | Index | |
// +----------------+----------------+---------------------+
// \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/
// \----------- PPN(la) -----------/
//
// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown.
// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la),
// use PGADDR(PDX(la), PTX(la), PGOFF(la)).
// page number field of address
#define PPN(la) (((uintptr_t) (la)) >> PTXSHIFT)
#define VPN(la) PPN(la) // used to index into vpt[]
// page directory index
#define PDX(la) ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF)
#define VPD(la) PDX(la) // used to index into vpd[]
// page table index
#define PTX(la) ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF)
// offset in page
#define PGOFF(la) (((uintptr_t) (la)) & 0xFFF)
// construct linear address from indexes and offset
#define PGADDR(d, t, o) ((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
// Page directory and page table constants.
#define NPDENTRIES 1024 // page directory entries per page directory
#define NPTENTRIES 1024 // page table entries per page table
#define PGSIZE 4096 // bytes mapped by a page
#define PGSHIFT 12 // log2(PGSIZE)
#define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry
#define PTSHIFT 22 // log2(PTSIZE)
#define PTXSHIFT 12 // offset of PTX in a linear address
#define PDXSHIFT 22 // offset of PDX in a linear address
// Page table/directory entry flags.
#define PTE_P 0x001 // Present
#define PTE_W 0x002 // Writeable
#define PTE_U 0x004 // User
#define PTE_PWT 0x008 // Write-Through
#define PTE_PCD 0x010 // Cache-Disable
#define PTE_A 0x020 // Accessed
#define PTE_D 0x040 // Dirty
#define PTE_PS 0x080 // Page Size
#define PTE_MBZ 0x180 // Bits must be zero
// The PTE_AVAIL bits aren't used by the kernel or interpreted by the
// hardware, so user processes are allowed to set them arbitrarily.
#define PTE_AVAIL 0xE00 // Available for software use
// Only flags in PTE_USER may be used in system calls.
#define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U)
// address in page table entry
#define PTE_ADDR(pte) ((physaddr_t) (pte) & ~0xFFF)
// Control Register flags
#define CR0_PE 0x00000001 // Protection Enable
#define CR0_MP 0x00000002 // Monitor coProcessor
#define CR0_EM 0x00000004 // Emulation
#define CR0_TS 0x00000008 // Task Switched
#define CR0_ET 0x00000010 // Extension Type
#define CR0_NE 0x00000020 // Numeric Errror
#define CR0_WP 0x00010000 // Write Protect
#define CR0_AM 0x00040000 // Alignment Mask
#define CR0_NW 0x20000000 // Not Writethrough
#define CR0_CD 0x40000000 // Cache Disable
#define CR0_PG 0x80000000 // Paging
#define CR4_PCE 0x00000100 // Performance counter enable
#define CR4_MCE 0x00000040 // Machine Check Enable
#define CR4_PSE 0x00000010 // Page Size Extensions
#define CR4_DE 0x00000008 // Debugging Extensions
#define CR4_TSD 0x00000004 // Time Stamp Disable
#define CR4_PVI 0x00000002 // Protected-Mode Virtual Interrupts
#define CR4_VME 0x00000001 // V86 Mode Extensions
// Eflags register
#define FL_CF 0x00000001 // Carry Flag
#define FL_PF 0x00000004 // Parity Flag
#define FL_AF 0x00000010 // Auxiliary carry Flag
#define FL_ZF 0x00000040 // Zero Flag
#define FL_SF 0x00000080 // Sign Flag
#define FL_TF 0x00000100 // Trap Flag
#define FL_IF 0x00000200 // Interrupt Flag
#define FL_DF 0x00000400 // Direction Flag
#define FL_OF 0x00000800 // Overflow Flag
#define FL_IOPL_MASK 0x00003000 // I/O Privilege Level bitmask
#define FL_IOPL_0 0x00000000 // IOPL == 0
#define FL_IOPL_1 0x00001000 // IOPL == 1
#define FL_IOPL_2 0x00002000 // IOPL == 2
#define FL_IOPL_3 0x00003000 // IOPL == 3
#define FL_NT 0x00004000 // Nested Task
#define FL_RF 0x00010000 // Resume Flag
#define FL_VM 0x00020000 // Virtual 8086 mode
#define FL_AC 0x00040000 // Alignment Check
#define FL_VIF 0x00080000 // Virtual Interrupt Flag
#define FL_VIP 0x00100000 // Virtual Interrupt Pending
#define FL_ID 0x00200000 // ID flag
// Page fault error codes
#define FEC_PR 0x1 // Page fault caused by protection violation
#define FEC_WR 0x2 // Page fault caused by a write
#define FEC_U 0x4 // Page fault occured while in user mode
/*
*
* Part 2. Segmentation data structures and constants.
*
*/
#ifdef __ASSEMBLER__
/*
* Macros to build GDT entries in assembly.
*/
#define SEG_NULL \
.word 0, 0; \
.byte 0, 0, 0, 0
#define SEG(type,base,lim) \
.word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \
.byte (((base) >> 16) & 0xff), (0x90 | (type)), \
(0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
#else // not __ASSEMBLER__
// Segment Descriptors
struct Segdesc {
unsigned sd_lim_15_0 : 16; // Low bits of segment limit
unsigned sd_base_15_0 : 16; // Low bits of segment base address
unsigned sd_base_23_16 : 8; // Middle bits of segment base address
unsigned sd_type : 4; // Segment type (see STS_ constants)
unsigned sd_s : 1; // 0 = system, 1 = application
unsigned sd_dpl : 2; // Descriptor Privilege Level
unsigned sd_p : 1; // Present
unsigned sd_lim_19_16 : 4; // High bits of segment limit
unsigned sd_avl : 1; // Unused (available for software use)
unsigned sd_rsv1 : 1; // Reserved
unsigned sd_db : 1; // 0 = 16-bit segment, 1 = 32-bit segment
unsigned sd_g : 1; // Granularity: limit scaled by 4K when set
unsigned sd_base_31_24 : 8; // High bits of segment base address
};
// Null segment
#define SEG_NULL (struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
// Segment that is loadable but faults when used
#define SEG_FAULT (struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 }
// Normal segment
#define SEG(type, base, lim, dpl) (struct Segdesc) \
{ ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \
type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1, \
(unsigned) (base) >> 24 }
#define SEG16(type, base, lim, dpl) (struct Segdesc) \
{ (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \
type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0, \
(unsigned) (base) >> 24 }
#endif /* !__ASSEMBLER__ */
// Application segment type bits
#define STA_X 0x8 // Executable segment
#define STA_E 0x4 // Expand down (non-executable segments)
#define STA_C 0x4 // Conforming code segment (executable only)
#define STA_W 0x2 // Writeable (non-executable segments)
#define STA_R 0x2 // Readable (executable segments)
#define STA_A 0x1 // Accessed
// System segment type bits
#define STS_T16A 0x1 // Available 16-bit TSS
#define STS_LDT 0x2 // Local Descriptor Table
#define STS_T16B 0x3 // Busy 16-bit TSS
#define STS_CG16 0x4 // 16-bit Call Gate
#define STS_TG 0x5 // Task Gate / Coum Transmitions
#define STS_IG16 0x6 // 16-bit Interrupt Gate
#define STS_TG16 0x7 // 16-bit Trap Gate
#define STS_T32A 0x9 // Available 32-bit TSS
#define STS_T32B 0xB // Busy 32-bit TSS
#define STS_CG32 0xC // 32-bit Call Gate
#define STS_IG32 0xE // 32-bit Interrupt Gate
#define STS_TG32 0xF // 32-bit Trap Gate
/*
*
* Part 3. Traps.
*
*/
#ifndef __ASSEMBLER__
// Task state segment format (as described by the Pentium architecture book)
struct Taskstate {
uint32_t ts_link; // Old ts selector
uintptr_t ts_esp0; // Stack pointers and segment selectors
uint16_t ts_ss0; // after an increase in privilege level
uint16_t ts_padding1;
uintptr_t ts_esp1;
uint16_t ts_ss1;
uint16_t ts_padding2;
uintptr_t ts_esp2;
uint16_t ts_ss2;
uint16_t ts_padding3;
physaddr_t ts_cr3; // Page directory base
uintptr_t ts_eip; // Saved state from last task switch
uint32_t ts_eflags;
uint32_t ts_eax; // More saved state (registers)
uint32_t ts_ecx;
uint32_t ts_edx;
uint32_t ts_ebx;
uintptr_t ts_esp;
uintptr_t ts_ebp;
uint32_t ts_esi;
uint32_t ts_edi;
uint16_t ts_es; // Even more saved state (segment selectors)
uint16_t ts_padding4;
uint16_t ts_cs;
uint16_t ts_padding5;
uint16_t ts_ss;
uint16_t ts_padding6;
uint16_t ts_ds;
uint16_t ts_padding7;
uint16_t ts_fs;
uint16_t ts_padding8;
uint16_t ts_gs;
uint16_t ts_padding9;
uint16_t ts_ldt;
uint16_t ts_padding10;
uint16_t ts_t; // Trap on task switch
uint16_t ts_iomb; // I/O map base address
};
// Gate descriptors for interrupts and traps
struct Gatedesc {
unsigned gd_off_15_0 : 16; // low 16 bits of offset in segment
unsigned gd_ss : 16; // segment selector
unsigned gd_args : 5; // # args, 0 for interrupt/trap gates
unsigned gd_rsv1 : 3; // reserved(should be zero I guess)
unsigned gd_type : 4; // type(STS_{TG,IG32,TG32})
unsigned gd_s : 1; // must be 0 (system)
unsigned gd_dpl : 2; // descriptor(meaning new) privilege level
unsigned gd_p : 1; // Present
unsigned gd_off_31_16 : 16; // high bits of offset in segment
};
// Set up a normal interrupt/trap gate descriptor.
// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
// - sel: Code segment selector for interrupt/trap handler
// - off: Offset in code segment for interrupt/trap handler
// - dpl: Descriptor Privilege Level -
// the privilege level required for software to invoke
// this interrupt/trap gate explicitly using an int instruction.
#define SETGATE(gate, istrap, sel, off, dpl) \
{ \
(gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \
(gate).gd_ss = (sel); \
(gate).gd_args = 0; \
(gate).gd_rsv1 = 0; \
(gate).gd_type = (istrap) ? STS_TG32 : STS_IG32; \
(gate).gd_s = 0; \
(gate).gd_dpl = (dpl); \
(gate).gd_p = 1; \
(gate).gd_off_31_16 = (uint32_t) (off) >> 16; \
}
// Set up a call gate descriptor.
#define SETCALLGATE(gate, ss, off, dpl) \
{ \
(gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \
(gate).gd_ss = (ss); \
(gate).gd_args = 0; \
(gate).gd_rsv1 = 0; \
(gate).gd_type = STS_CG32; \
(gate).gd_s = 0; \
(gate).gd_dpl = (dpl); \
(gate).gd_p = 1; \
(gate).gd_off_31_16 = (uint32_t) (off) >> 16; \
}
// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions.
struct Pseudodesc {
uint16_t pd__garbage; // LGDT supposed to be from address 4N+2
uint16_t pd_lim; // Limit
uint32_t pd_base __attribute__ ((packed)); // Base address
};
#define PD_ADDR(desc) (&(desc).pd_lim)
#endif /* !__ASSEMBLER__ */

3
param.h Normal file
View File

@ -0,0 +1,3 @@
#define NPROC 64
#define PAGE 4096
#define KSTACKSIZE PAGE

112
proc.c Normal file
View File

@ -0,0 +1,112 @@
#include "types.h"
#include "mmu.h"
#include "x86.h"
#include "proc.h"
#include "param.h"
#include "defs.h"
struct proc proc[NPROC];
/*
* set up a process's task state and segment descriptors
* correctly, given its current size and address in memory.
* this should be called whenever the latter change.
* doesn't change the cpu's current segmentation setup.
*/
void
setupsegs(struct proc *p)
{
memset(&p->ts, 0, sizeof(struct Taskstate));
p->ts.ts_ss0 = SEG_KDATA << 3;
p->ts.ts_esp0 = (unsigned)(p->kstack + KSTACKSIZE);
memset(&p->gdt, 0, sizeof(p->gdt));
p->gdt[0] = SEG_NULL;
p->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);
p->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
p->gdt[SEG_TSS] = SEG16(STS_T32A, (unsigned) &p->ts, sizeof(p->ts), 0);
p->gdt[SEG_TSS].sd_s = 0;
p->gdt[SEG_UCODE] = SEG(STA_X|STA_R, (unsigned)p->mem, p->sz, 3);
p->gdt[SEG_UDATA] = SEG(STA_W, (unsigned)p->mem, p->sz, 3);
p->gdt_pd.pd__garbage = 0;
p->gdt_pd.pd_lim = sizeof(p->gdt) - 1;
p->gdt_pd.pd_base = (unsigned) p->gdt;
}
extern void trapret();
/*
* internal fork(). does not copy kernel stack; instead,
* sets up the stack to return as if from system call.
*/
struct proc *
newproc(struct proc *op)
{
struct proc *np;
unsigned *sp;
for(np = &proc[1]; np < &proc[NPROC]; np++)
if(np->state == UNUSED)
break;
if(np >= &proc[NPROC])
return 0;
np->sz = op->sz;
np->mem = kalloc(op->sz);
if(np->mem == 0)
return 0;
memcpy(np->mem, op->mem, np->sz);
np->kstack = kalloc(KSTACKSIZE);
if(np->kstack == 0){
kfree(np->mem, op->sz);
return 0;
}
np->tf = (struct Trapframe *) (np->kstack + KSTACKSIZE - sizeof(struct Trapframe));
setupsegs(np);
np->state = RUNNABLE;
// set up kernel stack to return to user space
*(np->tf) = *(op->tf);
sp = (unsigned *) np->tf;
*(--sp) = (unsigned) &trapret; // for return from swtch()
*(--sp) = 0; // previous bp for leave in swtch()
np->esp = (unsigned) sp;
np->ebp = (unsigned) sp;
cprintf("esp %x ebp %x mem %x\n", np->esp, np->ebp, np->mem);
return np;
}
/*
* find a runnable process and switch to it.
*/
void
swtch(struct proc *op)
{
struct proc *np;
while(1){
for(np = op + 1; np != op; np++){
if(np == &proc[NPROC])
np = &proc[0];
if(np->state == RUNNABLE)
break;
}
if(np->state == RUNNABLE)
break;
// idle...
}
op->ebp = read_ebp();
op->esp = read_esp();
// XXX callee-saved registers?
// this happens to work, but probably isn't safe:
// it's not clear that np->ebp will evaluate
// correctly after changing the stack pointer.
asm volatile("lgdt %0" : : "g" (np->gdt_pd.pd_lim));
asm volatile("movl %0, %%esp" : : "g" (np->esp));
asm volatile("movl %0, %%ebp" : : "g" (np->ebp));
}

34
proc.h Normal file
View File

@ -0,0 +1,34 @@
/*
* p->mem:
* text
* original data and bss
* fixed-size stack
* expandable heap
*/
/*
* segments in proc->gdt
*/
#define SEG_KCODE 1 // kernel code
#define SEG_KDATA 2 // kernel data+stack
#define SEG_UCODE 3
#define SEG_UDATA 4
#define SEG_TSS 5 // this process's task state
#define NSEGS 6
struct proc{
char *mem; // start of process's physical memory
unsigned sz; // total size of mem, including kernel stack
char *kstack; // kernel stack, separate from mem so it doesn't move
enum { UNUSED, RUNNABLE, WAITING } state;
struct Taskstate ts; // only to give cpu address of kernel stack
struct Segdesc gdt[NSEGS];
struct Pseudodesc gdt_pd;
unsigned esp; // kernel stack pointer
unsigned ebp; // kernel frame pointer
struct Trapframe *tf; // points into kstack, used to find user regs
};
extern struct proc proc[];

19
sign.pl Executable file
View File

@ -0,0 +1,19 @@
#!/usr/bin/perl
open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!";
$n = sysread(SIG, $buf, 1000);
if($n > 510){
print STDERR "boot block too large: $n bytes (max 510)\n";
exit 1;
}
print STDERR "boot block is $n bytes (max 510)\n";
$buf .= "\0" x (510-$n);
$buf .= "\x55\xAA";
open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!";
print SIG $buf;
close SIG;

22
string.c Normal file
View File

@ -0,0 +1,22 @@
void *
memcpy(void *dst, void *src, unsigned n)
{
char *d = (char *) dst;
char *s = (char *) src;
while(n-- > 0)
*d++ = *s++;
return dst;
}
void *
memset(void *dst, int c, unsigned n)
{
char *d = (char *) dst;
while(n-- > 0)
*d++ = c;
return dst;
}

12
trapasm.S Normal file
View File

@ -0,0 +1,12 @@
.text
.globl trapret
/*
* a forked process RETs here
* expects ESP to point to a Trapframe
*/
trapret:
popal
popl %es
popl %ds
addl $0x8, %esp /* trapno and errcode */
iret

6
types.h Normal file
View File

@ -0,0 +1,6 @@
typedef unsigned long long uint64_t;
typedef unsigned int uint32_t;
typedef unsigned short uint16_t;
typedef unsigned char uint8_t;
typedef uint32_t uintptr_t;
typedef uint32_t physaddr_t;

301
x86.h Normal file
View File

@ -0,0 +1,301 @@
static __inline void breakpoint(void) __attribute__((always_inline));
static __inline uint8_t inb(int port) __attribute__((always_inline));
static __inline void insb(int port, void *addr, int cnt) __attribute__((always_inline));
static __inline uint16_t inw(int port) __attribute__((always_inline));
static __inline void insw(int port, void *addr, int cnt) __attribute__((always_inline));
static __inline uint32_t inl(int port) __attribute__((always_inline));
static __inline void insl(int port, void *addr, int cnt) __attribute__((always_inline));
static __inline void outb(int port, uint8_t data) __attribute__((always_inline));
static __inline void outsb(int port, const void *addr, int cnt) __attribute__((always_inline));
static __inline void outw(int port, uint16_t data) __attribute__((always_inline));
static __inline void outsw(int port, const void *addr, int cnt) __attribute__((always_inline));
static __inline void outsl(int port, const void *addr, int cnt) __attribute__((always_inline));
static __inline void outl(int port, uint32_t data) __attribute__((always_inline));
static __inline void invlpg(void *addr) __attribute__((always_inline));
static __inline void lidt(void *p) __attribute__((always_inline));
static __inline void lldt(uint16_t sel) __attribute__((always_inline));
static __inline void ltr(uint16_t sel) __attribute__((always_inline));
static __inline void lcr0(uint32_t val) __attribute__((always_inline));
static __inline uint32_t rcr0(void) __attribute__((always_inline));
static __inline uint32_t rcr2(void) __attribute__((always_inline));
static __inline void lcr3(uint32_t val) __attribute__((always_inline));
static __inline uint32_t rcr3(void) __attribute__((always_inline));
static __inline void lcr4(uint32_t val) __attribute__((always_inline));
static __inline uint32_t rcr4(void) __attribute__((always_inline));
static __inline void tlbflush(void) __attribute__((always_inline));
static __inline uint32_t read_eflags(void) __attribute__((always_inline));
static __inline void write_eflags(uint32_t eflags) __attribute__((always_inline));
static __inline uint32_t read_ebp(void) __attribute__((always_inline));
static __inline uint32_t read_esp(void) __attribute__((always_inline));
static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp);
static __inline uint64_t read_tsc(void) __attribute__((always_inline));
static __inline void
breakpoint(void)
{
__asm __volatile("int3");
}
static __inline uint8_t
inb(int port)
{
uint8_t data;
__asm __volatile("inb %w1,%0" : "=a" (data) : "d" (port));
return data;
}
static __inline void
insb(int port, void *addr, int cnt)
{
__asm __volatile("cld\n\trepne\n\tinsb" :
"=D" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"memory", "cc");
}
static __inline uint16_t
inw(int port)
{
uint16_t data;
__asm __volatile("inw %w1,%0" : "=a" (data) : "d" (port));
return data;
}
static __inline void
insw(int port, void *addr, int cnt)
{
__asm __volatile("cld\n\trepne\n\tinsw" :
"=D" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"memory", "cc");
}
static __inline uint32_t
inl(int port)
{
uint32_t data;
__asm __volatile("inl %w1,%0" : "=a" (data) : "d" (port));
return data;
}
static __inline void
insl(int port, void *addr, int cnt)
{
__asm __volatile("cld\n\trepne\n\tinsl" :
"=D" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"memory", "cc");
}
static __inline void
outb(int port, uint8_t data)
{
__asm __volatile("outb %0,%w1" : : "a" (data), "d" (port));
}
static __inline void
outsb(int port, const void *addr, int cnt)
{
__asm __volatile("cld\n\trepne\n\toutsb" :
"=S" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"cc");
}
static __inline void
outw(int port, uint16_t data)
{
__asm __volatile("outw %0,%w1" : : "a" (data), "d" (port));
}
static __inline void
outsw(int port, const void *addr, int cnt)
{
__asm __volatile("cld\n\trepne\n\toutsw" :
"=S" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"cc");
}
static __inline void
outsl(int port, const void *addr, int cnt)
{
__asm __volatile("cld\n\trepne\n\toutsl" :
"=S" (addr), "=c" (cnt) :
"d" (port), "0" (addr), "1" (cnt) :
"cc");
}
static __inline void
outl(int port, uint32_t data)
{
__asm __volatile("outl %0,%w1" : : "a" (data), "d" (port));
}
static __inline void
invlpg(void *addr)
{
__asm __volatile("invlpg (%0)" : : "r" (addr) : "memory");
}
static __inline void
lidt(void *p)
{
__asm __volatile("lidt (%0)" : : "r" (p));
}
static __inline void
lldt(uint16_t sel)
{
__asm __volatile("lldt %0" : : "r" (sel));
}
static __inline void
ltr(uint16_t sel)
{
__asm __volatile("ltr %0" : : "r" (sel));
}
static __inline void
lcr0(uint32_t val)
{
__asm __volatile("movl %0,%%cr0" : : "r" (val));
}
static __inline uint32_t
rcr0(void)
{
uint32_t val;
__asm __volatile("movl %%cr0,%0" : "=r" (val));
return val;
}
static __inline uint32_t
rcr2(void)
{
uint32_t val;
__asm __volatile("movl %%cr2,%0" : "=r" (val));
return val;
}
static __inline void
lcr3(uint32_t val)
{
__asm __volatile("movl %0,%%cr3" : : "r" (val));
}
static __inline uint32_t
rcr3(void)
{
uint32_t val;
__asm __volatile("movl %%cr3,%0" : "=r" (val));
return val;
}
static __inline void
lcr4(uint32_t val)
{
__asm __volatile("movl %0,%%cr4" : : "r" (val));
}
static __inline uint32_t
rcr4(void)
{
uint32_t cr4;
__asm __volatile("movl %%cr4,%0" : "=r" (cr4));
return cr4;
}
static __inline void
tlbflush(void)
{
uint32_t cr3;
__asm __volatile("movl %%cr3,%0" : "=r" (cr3));
__asm __volatile("movl %0,%%cr3" : : "r" (cr3));
}
static __inline uint32_t
read_eflags(void)
{
uint32_t eflags;
__asm __volatile("pushfl; popl %0" : "=r" (eflags));
return eflags;
}
static __inline void
write_eflags(uint32_t eflags)
{
__asm __volatile("pushl %0; popfl" : : "r" (eflags));
}
static __inline uint32_t
read_ebp(void)
{
uint32_t ebp;
__asm __volatile("movl %%ebp,%0" : "=r" (ebp));
return ebp;
}
static __inline uint32_t
read_esp(void)
{
uint32_t esp;
__asm __volatile("movl %%esp,%0" : "=r" (esp));
return esp;
}
static __inline void
cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp)
{
uint32_t eax, ebx, ecx, edx;
asm volatile("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (info));
if (eaxp)
*eaxp = eax;
if (ebxp)
*ebxp = ebx;
if (ecxp)
*ecxp = ecx;
if (edxp)
*edxp = edx;
}
static __inline uint64_t
read_tsc(void)
{
uint64_t tsc;
__asm __volatile("rdtsc" : "=A" (tsc));
return tsc;
}
struct PushRegs {
/* registers as pushed by pusha */
uint32_t reg_edi;
uint32_t reg_esi;
uint32_t reg_ebp;
uint32_t reg_oesp; /* Useless */
uint32_t reg_ebx;
uint32_t reg_edx;
uint32_t reg_ecx;
uint32_t reg_eax;
};
struct Trapframe {
struct PushRegs tf_regs;
uint16_t tf_es;
uint16_t tf_padding1;
uint16_t tf_ds;
uint16_t tf_padding2;
uint32_t tf_trapno;
/* below here defined by x86 hardware */
uint32_t tf_err;
uintptr_t tf_eip;
uint16_t tf_cs;
uint16_t tf_padding3;
uint32_t tf_eflags;
/* below here only when crossing rings, such as from user to kernel */
uintptr_t tf_esp;
uint16_t tf_ss;
uint16_t tf_padding4;
};