From 55e95b16db458b7f9abeca96e541acbdf8d7f85b Mon Sep 17 00:00:00 2001 From: rtm Date: Mon, 12 Jun 2006 15:22:12 +0000 Subject: [PATCH] import --- Makefile | 30 ++++++ Notes | 67 ++++++++++++ bootasm.S | 109 +++++++++++++++++++ bootmain.c | 121 +++++++++++++++++++++ console.c | 108 +++++++++++++++++++ defs.h | 12 +++ elf.h | 43 ++++++++ kalloc.c | 158 +++++++++++++++++++++++++++ main.c | 40 +++++++ mmu.h | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++ param.h | 3 + proc.c | 112 +++++++++++++++++++ proc.h | 34 ++++++ sign.pl | 19 ++++ string.c | 22 ++++ trapasm.S | 12 +++ types.h | 6 ++ x86.h | 301 +++++++++++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 1505 insertions(+) create mode 100644 Makefile create mode 100644 Notes create mode 100644 bootasm.S create mode 100644 bootmain.c create mode 100644 console.c create mode 100644 defs.h create mode 100644 elf.h create mode 100644 kalloc.c create mode 100644 main.c create mode 100644 mmu.h create mode 100644 param.h create mode 100644 proc.c create mode 100644 proc.h create mode 100755 sign.pl create mode 100644 string.c create mode 100644 trapasm.S create mode 100644 types.h create mode 100644 x86.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e63c77c --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +OBJS = main.o console.o string.o kalloc.o proc.o trapasm.o + +CC = i386-jos-elf-gcc +LD = i386-jos-elf-ld +OBJCOPY = i386-jos-elf-objcopy +OBJDUMP = i386-jos-elf-objdump + +xv6.img : bootblock kernel + dd if=/dev/zero of=xv6.img count=10000 + dd if=bootblock of=xv6.img conv=notrunc + dd if=kernel of=xv6.img seek=1 conv=notrunc + +bootblock : bootasm.S bootmain.c + $(CC) -O -nostdinc -I. -c bootmain.c + $(CC) -nostdinc -I. -c bootasm.S + $(LD) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o + $(OBJDUMP) -S bootblock.o > bootblock.asm + $(OBJCOPY) -S -O binary bootblock.o bootblock + ./sign.pl bootblock + +kernel : $(OBJS) + $(LD) -Ttext 0x100000 -e main -o kernel $(OBJS) + $(OBJDUMP) -S kernel > kernel.asm + +%.o: %.c + $(CC) -nostdinc -I. -O -c -o $@ $< + +clean : + rm -f bootmain.o bootasm.o bootblock.o bootblock + rm -f kernel main.o kernel.asm xv6.img diff --git a/Notes b/Notes new file mode 100644 index 0000000..e5e2c5f --- /dev/null +++ b/Notes @@ -0,0 +1,67 @@ +bootmain.c doesn't work right if the ELF sections aren't +sector-aligned. so you can't use ld -N. and the sections may also need +to be non-zero length, only really matters for tiny "kernels". + +kernel loaded at 1 megabyte. stack same place that bootasm.S left it. + +kinit() should find real mem size + and rescue useable memory below 1 meg + +no paging, no use of page table hardware, just segments + +no user area: no magic kernel stack mapping + so no copying of kernel stack during fork + though there is a kernel stack page for each process + +no kernel malloc(), just kalloc() for user core + +user pointers aren't valid in the kernel + +setting up first process + we do want a process zero, as template + but not runnable + just set up return-from-trap frame on new kernel stack + fake user program that calls exec + +map text read-only? +shared text? + +what's on the stack during a trap or sys call? + PUSHA before scheduler switch? for callee-saved registers. + segment contents? + what does iret need to get out of the kernel? + how does INT know what kernel stack to use? + +are interrupts turned on in the kernel? probably. + +per-cpu curproc +one tss per process, or one per cpu? +one segment array per cpu, or per process? + +pass curproc explicitly, or implicit from cpu #? + e.g. argument to newproc()? + +test stack expansion +test running out of memory, process slots + +we can't really use a separate stack segment, since stack addresses +need to work correctly as ordinary pointers. the same may be true of +data vs text. how can we have a gap between data and stack, so that +both can grow, without committing 4GB of physical memory? does this +mean we need paging? + +what's the simplest way to add the paging we need? + one page table, re-write it each time we leave the kernel? + page table per process? + probably need to use 0-0xffffffff segments, so that + both data and stack pointers always work + so is it now worth it to make a process's phys mem contiguous? + or could use segment limits and 4 meg pages? + but limits would prevent using stack pointers as data pointers + how to write-protect text? not important? + +perhaps have fixed-size stack, put it in the data segment? + +oops, if kernel stack is in contiguous user phys mem, then moving +users' memory (e.g. to expand it) will wreck any pointers into the +kernel stack. diff --git a/bootasm.S b/bootasm.S new file mode 100644 index 0000000..00cbdc9 --- /dev/null +++ b/bootasm.S @@ -0,0 +1,109 @@ +#define SEG_NULL \ + .word 0, 0; \ + .byte 0, 0, 0, 0 +#define SEG(type,base,lim) \ + .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#define STA_X 0x8 // Executable segment +#define STA_E 0x4 // Expand down (non-executable segments) +#define STA_C 0x4 // Conforming code segment (executable only) +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) +#define STA_A 0x1 // Accessed + +.set PROT_MODE_CSEG,0x8 # code segment selector +.set PROT_MODE_DSEG,0x10 # data segment selector +.set CR0_PE_ON,0x1 # protected mode enable flag + +################################################################################### +# ENTRY POINT +# This code should be stored in the first sector of the hard disk. +# After the BIOS initializes the hardware on startup or system reset, +# it loads this code at physical address 0x7c00 - 0x7d00 (512 bytes). +# Then the BIOS jumps to the beginning of it, address 0x7c00, +# while running in 16-bit real-mode (8086 compatibility mode). +# The Code Segment register (CS) is initially zero on entry. +# +# This code switches into 32-bit protected mode so that all of +# memory can accessed, then calls into C. +################################################################################### + +.globl start # Entry point +start: .code16 # This runs in real mode + cli # Disable interrupts + cld # String operations increment + + # Set up the important data segment registers (DS, ES, SS). + xorw %ax,%ax # Segment number zero + movw %ax,%ds # -> Data Segment + movw %ax,%es # -> Extra Segment + movw %ax,%ss # -> Stack Segment + + # Set up the stack pointer, growing downward from 0x7c00. + movw $start,%sp # Stack Pointer + +#### Enable A20: +#### For fascinating historical reasons (related to the fact that +#### the earliest 8086-based PCs could only address 1MB of physical memory +#### and subsequent 80286-based PCs wanted to retain maximum compatibility), +#### physical address line 20 is tied to low when the machine boots. +#### Obviously this a bit of a drag for us, especially when trying to +#### address memory above 1MB. This code undoes this. + +seta20.1: inb $0x64,%al # Get status + testb $0x2,%al # Busy? + jnz seta20.1 # Yes + movb $0xd1,%al # Command: Write + outb %al,$0x64 # output port +seta20.2: inb $0x64,%al # Get status + testb $0x2,%al # Busy? + jnz seta20.2 # Yes + movb $0xdf,%al # Enable + outb %al,$0x60 # A20 + +#### Switch from real to protected mode +#### The descriptors in our GDT allow all physical memory to be accessed. +#### Furthermore, the descriptors have base addresses of 0, so that the +#### segment translation is a NOP, ie. virtual addresses are identical to +#### their physical addresses. With this setup, immediately after +#### enabling protected mode it will still appear to this code +#### that it is running directly on physical memory with no translation. +#### This initial NOP-translation setup is required by the processor +#### to ensure that the transition to protected mode occurs smoothly. + +real_to_prot: cli # Mandatory since we dont set up an IDT + lgdt gdtdesc # load GDT -- mandatory in protected mode + movl %cr0, %eax # turn on protected mode + orl $CR0_PE_ON, %eax # + movl %eax, %cr0 # + ### CPU magic: jump to relocation, flush prefetch queue, and reload %cs + ### Has the effect of just jmp to the next instruction, but simultaneous + ### loads CS with $PROT_MODE_CSEG. + ljmp $PROT_MODE_CSEG, $protcseg + +#### we are in 32-bit protected mode (hence the .code32) +.code32 +protcseg: + # Set up the protected-mode data segment registers + movw $PROT_MODE_DSEG, %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + movw %ax, %ss # -> SS: Stack Segment + + call cmain # finish the boot load from C. + # cmain() should not return +spin: jmp spin # ..but in case it does, spin + +.p2align 2 # force 4 byte alignment +gdt: + SEG_NULL # null seg + SEG(STA_X|STA_R, 0x0, 0xffffffff) # code seg + SEG(STA_W, 0x0, 0xffffffff) # data seg + +gdtdesc: + .word 0x17 # sizeof(gdt) - 1 + .long gdt # address gdt diff --git a/bootmain.c b/bootmain.c new file mode 100644 index 0000000..79d769c --- /dev/null +++ b/bootmain.c @@ -0,0 +1,121 @@ +#include +#include +#include + +/********************************************************************** + * This a dirt simple boot loader, whose sole job is to boot + * an elf kernel image from the first IDE hard disk. + * + * DISK LAYOUT + * * This program(boot.S and main.c) is the bootloader. It should + * be stored in the first sector of the disk. + * + * * The 2nd sector onward holds the kernel image. + * + * * The kernel image must be in ELF format. + * + * BOOT UP STEPS + * * when the CPU boots it loads the BIOS into memory and executes it + * + * * the BIOS intializes devices, sets of the interrupt routines, and + * reads the first sector of the boot device(e.g., hard-drive) + * into memory and jumps to it. + * + * * Assuming this boot loader is stored in the first sector of the + * hard-drive, this code takes over... + * + * * control starts in bootloader.S -- which sets up protected mode, + * and a stack so C code then run, then calls cmain() + * + * * cmain() in this file takes over, reads in the kernel and jumps to it. + **********************************************************************/ + +#define SECTSIZE 512 +#define ELFHDR ((struct Elf *) 0x10000) // scratch space + +void readsect(void*, uint32_t); +void readseg(uint32_t, uint32_t, uint32_t); + +void +cmain(void) +{ + struct Proghdr *ph, *eph; + + // read 1st page off disk + readseg((uint32_t) ELFHDR, SECTSIZE*8, 0); + + // is this a valid ELF? + if (ELFHDR->e_magic != ELF_MAGIC) + goto bad; + + // load each program segment (ignores ph flags) + ph = (struct Proghdr *) ((uint8_t *) ELFHDR + ELFHDR->e_phoff); + eph = ph + ELFHDR->e_phnum; + for (; ph < eph; ph++) + readseg(ph->p_va, ph->p_memsz, ph->p_offset); + + // call the entry point from the ELF header + // note: does not return! + ((void (*)(void)) (ELFHDR->e_entry & 0xFFFFFF))(); + +bad: + outw(0x8A00, 0x8A00); + outw(0x8A00, 0x8E00); + while (1) + /* do nothing */; +} + +// Read 'count' bytes at 'offset' from kernel into virtual address 'va'. +// Might copy more than asked +void +readseg(uint32_t va, uint32_t count, uint32_t offset) +{ + uint32_t end_va; + + va &= 0xFFFFFF; + end_va = va + count; + + // round down to sector boundary + va &= ~(SECTSIZE - 1); + + // translate from bytes to sectors, and kernel starts at sector 1 + offset = (offset / SECTSIZE) + 1; + + // If this is too slow, we could read lots of sectors at a time. + // We'd write more to memory than asked, but it doesn't matter -- + // we load in increasing order. + while (va < end_va) { + readsect((uint8_t*) va, offset); + va += SECTSIZE; + offset++; + } +} + +void +waitdisk(void) +{ + // wait for disk reaady + while ((inb(0x1F7) & 0xC0) != 0x40) + /* do nothing */; +} + +void +readsect(void *dst, uint32_t offset) +{ + // wait for disk to be ready + waitdisk(); + + outb(0x1F2, 1); // count = 1 + outb(0x1F3, offset); + outb(0x1F4, offset >> 8); + outb(0x1F5, offset >> 16); + outb(0x1F6, (offset >> 24) | 0xE0); + outb(0x1F7, 0x20); // cmd 0x20 - read sectors + + // wait for disk to be ready + waitdisk(); + + // read a sector + insl(0x1F0, dst, SECTSIZE/4); +} + diff --git a/console.c b/console.c new file mode 100644 index 0000000..2035611 --- /dev/null +++ b/console.c @@ -0,0 +1,108 @@ +#include +#include +#include "defs.h" + +void +cons_putc(int c) +{ + int crtport = 0x3d4; // io port of CGA + unsigned short *crt = (unsigned short *) 0xB8000; // base of CGA memory + int ind; + + // cursor position, 16 bits, col + 80*row + outb(crtport, 14); + ind = inb(crtport + 1) << 8; + outb(crtport, 15); + ind |= inb(crtport + 1); + + c &= 0xff; + + if(c == '\n'){ + ind -= (ind % 80); + ind += 80; + } else { + c |= 0x0700; // black on white + crt[ind] = c; + ind += 1; + } + + if((ind / 80) >= 24){ + // scroll up + memcpy(crt, crt + 80, sizeof(crt[0]) * (23 * 80)); + ind -= 80; + memset(crt + ind, 0, sizeof(crt[0]) * ((24 * 80) - ind)); + } + + outb(crtport, 14); + outb(crtport + 1, ind >> 8); + outb(crtport, 15); + outb(crtport + 1, ind); +} + +void +printint(int xx, int base, int sgn) +{ + char buf[16]; + char digits[] = "0123456789ABCDEF"; + int i = 0, neg = 0; + unsigned int x; + + if(sgn && xx < 0){ + neg = 1; + x = 0 - xx; + } else { + x = xx; + } + + do { + buf[i++] = digits[x % base]; + } while((x /= base) != 0); + if(neg) + buf[i++] = '-'; + + while(i > 0){ + i -= 1; + cons_putc(buf[i]); + } +} + +/* + * print to the console. only understands %d and %x. + */ +void +cprintf(char *fmt, ...) +{ + int i, state = 0, c; + unsigned int *ap = (unsigned int *) &fmt + 1; + + for(i = 0; fmt[i]; i++){ + c = fmt[i] & 0xff; + if(state == 0){ + if(c == '%'){ + state = '%'; + } else { + cons_putc(c); + } + } else if(state == '%'){ + if(c == 'd'){ + printint(*ap, 10, 1); + ap++; + } else if(c == 'x'){ + printint(*ap, 16, 0); + ap++; + } else if(c == '%'){ + cons_putc(c); + } + state = 0; + } + } +} + +void +panic(char *s) +{ + cprintf(s, 0); + cprintf("\n", 0); + while(1) + ; +} diff --git a/defs.h b/defs.h new file mode 100644 index 0000000..ec41bfe --- /dev/null +++ b/defs.h @@ -0,0 +1,12 @@ +// kalloc.c +char *kalloc(int n); +void kfree(char *cp, int len); + +// console.c +void cprintf(char *fmt, ...); +void panic(char *s); + +// proc.c +struct proc; +void setupsegs(struct proc *p); +struct proc * newproc(struct proc *op); diff --git a/elf.h b/elf.h new file mode 100644 index 0000000..ea9f964 --- /dev/null +++ b/elf.h @@ -0,0 +1,43 @@ +#ifndef JOS_INC_ELF_H +#define JOS_INC_ELF_H + +#define ELF_MAGIC 0x464C457FU /* "\x7FELF" in little endian */ + +struct Elf { + uint32_t e_magic; // must equal ELF_MAGIC + uint8_t e_elf[12]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +}; + +struct Proghdr { + uint32_t p_type; + uint32_t p_offset; + uint32_t p_va; + uint32_t p_pa; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +}; + +// Values for Proghdr::p_type +#define ELF_PROG_LOAD 1 + +// Flag bits for Proghdr::p_flags +#define ELF_PROG_FLAG_EXEC 1 +#define ELF_PROG_FLAG_WRITE 2 +#define ELF_PROG_FLAG_READ 4 + +#endif /* !JOS_INC_ELF_H */ diff --git a/kalloc.c b/kalloc.c new file mode 100644 index 0000000..5ea38fd --- /dev/null +++ b/kalloc.c @@ -0,0 +1,158 @@ +/* + * physical memory allocator, intended to be used to allocate + * memory for user processes. allocates in 4096-byte "pages". + * free list is sorted and combines adjacent pages into + * long runs, to make it easier to allocate big segments. + * one reason the page size is 4k is that the x86 segment size + * granularity is 4k. + */ + +#include "param.h" +#include "types.h" +#include "defs.h" + +struct run { + struct run *next; + int len; // bytes +}; +struct run *freelist; + +void ktest(); + +/* + * initialize free list of physical pages. this code + * cheats by just considering the one megabyte of pages + * after _end. + */ +void +kinit() +{ + extern int end; + unsigned mem; + char *start; + + start = (char *) &end; + start = (char *) (((unsigned)start + PAGE) & ~(PAGE-1)); + mem = 256; // XXX + cprintf("mem = %d\n", mem * PAGE); + kfree(start, mem * PAGE); + ktest(); +} + +void +kfree(char *cp, int len) +{ + struct run **rr; + struct run *p = (struct run *) cp; + struct run *pend = (struct run *) (cp + len); + + if(len % PAGE) + panic("kfree"); + + rr = &freelist; + while(*rr){ + struct run *rend = (struct run *) ((char *)(*rr) + (*rr)->len); + if(p >= *rr && p < rend) + panic("freeing free page"); + if(pend == *rr){ + p->len = len + (*rr)->len; + p->next = (*rr)->next; + *rr = p; + return; + } + if(pend < *rr){ + p->len = len; + p->next = *rr; + *rr = p; + return; + } + if(p == rend){ + (*rr)->len += len; + if((*rr)->next && (*rr)->next == pend){ + (*rr)->len += (*rr)->next->len; + (*rr)->next = (*rr)->next->next; + } + return; + } + rr = &((*rr)->next); + } + p->len = len; + p->next = 0; + *rr = p; +} + +/* + * allocate n bytes of physical memory. + * returns a kernel-segment pointer. + * returns 0 if there's no run that's big enough. + */ +char * +kalloc(int n) +{ + struct run **rr; + + if(n % PAGE) + panic("kalloc"); + + rr = &freelist; + while(*rr){ + struct run *r = *rr; + if(r->len == n){ + *rr = r->next; + return (char *) r; + } + if(r->len > n){ + char *p = (char *)r + (r->len - n); + r->len -= n; + return p; + } + rr = &(*rr)->next; + } + return 0; +} + +void +ktest() +{ + char *p1, *p2, *p3; + + // test coalescing + p1 = kalloc(4 * PAGE); + kfree(p1 + 3*PAGE, PAGE); + kfree(p1 + 2*PAGE, PAGE); + kfree(p1, PAGE); + kfree(p1 + PAGE, PAGE); + p2 = kalloc(4 * PAGE); + if(p2 != p1) + panic("ktest"); + kfree(p2, 4 * PAGE); + + // test finding first run that fits + p1 = kalloc(1 * PAGE); + p2 = kalloc(1 * PAGE); + kfree(p1, PAGE); + p3 = kalloc(2 * PAGE); + kfree(p2, PAGE); + kfree(p3, 2 * PAGE); + + // test running out of memory + p1 = 0; + while(1){ + p2 = kalloc(PAGE); + if(p2 == 0) + break; + *(char **)p2 = p1; + p1 = p2; + } + while(p1){ + p2 = *(char **)p1; + kfree(p1, PAGE); + p1 = p2; + } + p1 = kalloc(PAGE * 20); + if(p1 == 0) + panic("ktest2"); + kfree(p1, PAGE * 20); + + cprintf("ktest ok\n"); +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..a1c08c9 --- /dev/null +++ b/main.c @@ -0,0 +1,40 @@ +#include "types.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "x86.h" + +char junk1[20000]; +char junk2[20000] = { 1 }; + +main() +{ + struct proc *p; + + cprintf("\nxV6\n\n"); + + // initialize physical memory allocator + kinit(); + + // create fake process zero + p = &proc[0]; + p->state = WAITING; + p->sz = PAGE; + p->mem = kalloc(p->sz); + memset(p->mem, 0, p->sz); + p->kstack = kalloc(KSTACKSIZE); + p->tf = (struct Trapframe *) (p->kstack + KSTACKSIZE - sizeof(struct Trapframe)); + memset(p->tf, 0, sizeof(struct Trapframe)); + p->tf->tf_es = p->tf->tf_ds = p->tf->tf_ss = (SEG_UDATA << 3) | 3; + p->tf->tf_cs = (SEG_UCODE << 3) | 3; + p->tf->tf_eflags = FL_IF; + setupsegs(p); + + p = newproc(&proc[0]); + // xxx copy instructions to p->mem + p->tf->tf_eip = 0; + p->tf->tf_esp = p->sz; + + swtch(&proc[0]); +} diff --git a/mmu.h b/mmu.h new file mode 100644 index 0000000..776db23 --- /dev/null +++ b/mmu.h @@ -0,0 +1,308 @@ +/* + * This file contains definitions for the x86 memory management unit (MMU), + * including paging- and segmentation-related data structures and constants, + * the %cr0, %cr4, and %eflags registers, and traps. + */ + +/* + * + * Part 1. Paging data structures and constants. + * + */ + +// A linear address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory | Page Table | Offset within Page | +// | Index | Index | | +// +----------------+----------------+---------------------+ +// \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ +// \----------- PPN(la) -----------/ +// +// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. +// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), +// use PGADDR(PDX(la), PTX(la), PGOFF(la)). + +// page number field of address +#define PPN(la) (((uintptr_t) (la)) >> PTXSHIFT) +#define VPN(la) PPN(la) // used to index into vpt[] + +// page directory index +#define PDX(la) ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF) +#define VPD(la) PDX(la) // used to index into vpd[] + +// page table index +#define PTX(la) ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF) + +// offset in page +#define PGOFF(la) (((uintptr_t) (la)) & 0xFFF) + +// construct linear address from indexes and offset +#define PGADDR(d, t, o) ((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// Page directory and page table constants. +#define NPDENTRIES 1024 // page directory entries per page directory +#define NPTENTRIES 1024 // page table entries per page table + +#define PGSIZE 4096 // bytes mapped by a page +#define PGSHIFT 12 // log2(PGSIZE) + +#define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry +#define PTSHIFT 22 // log2(PTSIZE) + +#define PTXSHIFT 12 // offset of PTX in a linear address +#define PDXSHIFT 22 // offset of PDX in a linear address + +// Page table/directory entry flags. +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PWT 0x008 // Write-Through +#define PTE_PCD 0x010 // Cache-Disable +#define PTE_A 0x020 // Accessed +#define PTE_D 0x040 // Dirty +#define PTE_PS 0x080 // Page Size +#define PTE_MBZ 0x180 // Bits must be zero + +// The PTE_AVAIL bits aren't used by the kernel or interpreted by the +// hardware, so user processes are allowed to set them arbitrarily. +#define PTE_AVAIL 0xE00 // Available for software use + +// Only flags in PTE_USER may be used in system calls. +#define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U) + +// address in page table entry +#define PTE_ADDR(pte) ((physaddr_t) (pte) & ~0xFFF) + +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_MP 0x00000002 // Monitor coProcessor +#define CR0_EM 0x00000004 // Emulation +#define CR0_TS 0x00000008 // Task Switched +#define CR0_ET 0x00000010 // Extension Type +#define CR0_NE 0x00000020 // Numeric Errror +#define CR0_WP 0x00010000 // Write Protect +#define CR0_AM 0x00040000 // Alignment Mask +#define CR0_NW 0x20000000 // Not Writethrough +#define CR0_CD 0x40000000 // Cache Disable +#define CR0_PG 0x80000000 // Paging + +#define CR4_PCE 0x00000100 // Performance counter enable +#define CR4_MCE 0x00000040 // Machine Check Enable +#define CR4_PSE 0x00000010 // Page Size Extensions +#define CR4_DE 0x00000008 // Debugging Extensions +#define CR4_TSD 0x00000004 // Time Stamp Disable +#define CR4_PVI 0x00000002 // Protected-Mode Virtual Interrupts +#define CR4_VME 0x00000001 // V86 Mode Extensions + +// Eflags register +#define FL_CF 0x00000001 // Carry Flag +#define FL_PF 0x00000004 // Parity Flag +#define FL_AF 0x00000010 // Auxiliary carry Flag +#define FL_ZF 0x00000040 // Zero Flag +#define FL_SF 0x00000080 // Sign Flag +#define FL_TF 0x00000100 // Trap Flag +#define FL_IF 0x00000200 // Interrupt Flag +#define FL_DF 0x00000400 // Direction Flag +#define FL_OF 0x00000800 // Overflow Flag +#define FL_IOPL_MASK 0x00003000 // I/O Privilege Level bitmask +#define FL_IOPL_0 0x00000000 // IOPL == 0 +#define FL_IOPL_1 0x00001000 // IOPL == 1 +#define FL_IOPL_2 0x00002000 // IOPL == 2 +#define FL_IOPL_3 0x00003000 // IOPL == 3 +#define FL_NT 0x00004000 // Nested Task +#define FL_RF 0x00010000 // Resume Flag +#define FL_VM 0x00020000 // Virtual 8086 mode +#define FL_AC 0x00040000 // Alignment Check +#define FL_VIF 0x00080000 // Virtual Interrupt Flag +#define FL_VIP 0x00100000 // Virtual Interrupt Pending +#define FL_ID 0x00200000 // ID flag + +// Page fault error codes +#define FEC_PR 0x1 // Page fault caused by protection violation +#define FEC_WR 0x2 // Page fault caused by a write +#define FEC_U 0x4 // Page fault occured while in user mode + + +/* + * + * Part 2. Segmentation data structures and constants. + * + */ + +#ifdef __ASSEMBLER__ + +/* + * Macros to build GDT entries in assembly. + */ +#define SEG_NULL \ + .word 0, 0; \ + .byte 0, 0, 0, 0 +#define SEG(type,base,lim) \ + .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#else // not __ASSEMBLER__ + +// Segment Descriptors +struct Segdesc { + unsigned sd_lim_15_0 : 16; // Low bits of segment limit + unsigned sd_base_15_0 : 16; // Low bits of segment base address + unsigned sd_base_23_16 : 8; // Middle bits of segment base address + unsigned sd_type : 4; // Segment type (see STS_ constants) + unsigned sd_s : 1; // 0 = system, 1 = application + unsigned sd_dpl : 2; // Descriptor Privilege Level + unsigned sd_p : 1; // Present + unsigned sd_lim_19_16 : 4; // High bits of segment limit + unsigned sd_avl : 1; // Unused (available for software use) + unsigned sd_rsv1 : 1; // Reserved + unsigned sd_db : 1; // 0 = 16-bit segment, 1 = 32-bit segment + unsigned sd_g : 1; // Granularity: limit scaled by 4K when set + unsigned sd_base_31_24 : 8; // High bits of segment base address +}; +// Null segment +#define SEG_NULL (struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +// Segment that is loadable but faults when used +#define SEG_FAULT (struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 } +// Normal segment +#define SEG(type, base, lim, dpl) (struct Segdesc) \ +{ ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ + type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1, \ + (unsigned) (base) >> 24 } +#define SEG16(type, base, lim, dpl) (struct Segdesc) \ +{ (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ + type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0, \ + (unsigned) (base) >> 24 } + +#endif /* !__ASSEMBLER__ */ + +// Application segment type bits +#define STA_X 0x8 // Executable segment +#define STA_E 0x4 // Expand down (non-executable segments) +#define STA_C 0x4 // Conforming code segment (executable only) +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) +#define STA_A 0x1 // Accessed + +// System segment type bits +#define STS_T16A 0x1 // Available 16-bit TSS +#define STS_LDT 0x2 // Local Descriptor Table +#define STS_T16B 0x3 // Busy 16-bit TSS +#define STS_CG16 0x4 // 16-bit Call Gate +#define STS_TG 0x5 // Task Gate / Coum Transmitions +#define STS_IG16 0x6 // 16-bit Interrupt Gate +#define STS_TG16 0x7 // 16-bit Trap Gate +#define STS_T32A 0x9 // Available 32-bit TSS +#define STS_T32B 0xB // Busy 32-bit TSS +#define STS_CG32 0xC // 32-bit Call Gate +#define STS_IG32 0xE // 32-bit Interrupt Gate +#define STS_TG32 0xF // 32-bit Trap Gate + + +/* + * + * Part 3. Traps. + * + */ + +#ifndef __ASSEMBLER__ + +// Task state segment format (as described by the Pentium architecture book) +struct Taskstate { + uint32_t ts_link; // Old ts selector + uintptr_t ts_esp0; // Stack pointers and segment selectors + uint16_t ts_ss0; // after an increase in privilege level + uint16_t ts_padding1; + uintptr_t ts_esp1; + uint16_t ts_ss1; + uint16_t ts_padding2; + uintptr_t ts_esp2; + uint16_t ts_ss2; + uint16_t ts_padding3; + physaddr_t ts_cr3; // Page directory base + uintptr_t ts_eip; // Saved state from last task switch + uint32_t ts_eflags; + uint32_t ts_eax; // More saved state (registers) + uint32_t ts_ecx; + uint32_t ts_edx; + uint32_t ts_ebx; + uintptr_t ts_esp; + uintptr_t ts_ebp; + uint32_t ts_esi; + uint32_t ts_edi; + uint16_t ts_es; // Even more saved state (segment selectors) + uint16_t ts_padding4; + uint16_t ts_cs; + uint16_t ts_padding5; + uint16_t ts_ss; + uint16_t ts_padding6; + uint16_t ts_ds; + uint16_t ts_padding7; + uint16_t ts_fs; + uint16_t ts_padding8; + uint16_t ts_gs; + uint16_t ts_padding9; + uint16_t ts_ldt; + uint16_t ts_padding10; + uint16_t ts_t; // Trap on task switch + uint16_t ts_iomb; // I/O map base address +}; + +// Gate descriptors for interrupts and traps +struct Gatedesc { + unsigned gd_off_15_0 : 16; // low 16 bits of offset in segment + unsigned gd_ss : 16; // segment selector + unsigned gd_args : 5; // # args, 0 for interrupt/trap gates + unsigned gd_rsv1 : 3; // reserved(should be zero I guess) + unsigned gd_type : 4; // type(STS_{TG,IG32,TG32}) + unsigned gd_s : 1; // must be 0 (system) + unsigned gd_dpl : 2; // descriptor(meaning new) privilege level + unsigned gd_p : 1; // Present + unsigned gd_off_31_16 : 16; // high bits of offset in segment +}; + +// Set up a normal interrupt/trap gate descriptor. +// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. +// - sel: Code segment selector for interrupt/trap handler +// - off: Offset in code segment for interrupt/trap handler +// - dpl: Descriptor Privilege Level - +// the privilege level required for software to invoke +// this interrupt/trap gate explicitly using an int instruction. +#define SETGATE(gate, istrap, sel, off, dpl) \ +{ \ + (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \ + (gate).gd_ss = (sel); \ + (gate).gd_args = 0; \ + (gate).gd_rsv1 = 0; \ + (gate).gd_type = (istrap) ? STS_TG32 : STS_IG32; \ + (gate).gd_s = 0; \ + (gate).gd_dpl = (dpl); \ + (gate).gd_p = 1; \ + (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \ +} + +// Set up a call gate descriptor. +#define SETCALLGATE(gate, ss, off, dpl) \ +{ \ + (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \ + (gate).gd_ss = (ss); \ + (gate).gd_args = 0; \ + (gate).gd_rsv1 = 0; \ + (gate).gd_type = STS_CG32; \ + (gate).gd_s = 0; \ + (gate).gd_dpl = (dpl); \ + (gate).gd_p = 1; \ + (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \ +} + +// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions. +struct Pseudodesc { + uint16_t pd__garbage; // LGDT supposed to be from address 4N+2 + uint16_t pd_lim; // Limit + uint32_t pd_base __attribute__ ((packed)); // Base address +}; +#define PD_ADDR(desc) (&(desc).pd_lim) + +#endif /* !__ASSEMBLER__ */ + diff --git a/param.h b/param.h new file mode 100644 index 0000000..798dc5b --- /dev/null +++ b/param.h @@ -0,0 +1,3 @@ +#define NPROC 64 +#define PAGE 4096 +#define KSTACKSIZE PAGE diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..fda834e --- /dev/null +++ b/proc.c @@ -0,0 +1,112 @@ +#include "types.h" +#include "mmu.h" +#include "x86.h" +#include "proc.h" +#include "param.h" +#include "defs.h" + +struct proc proc[NPROC]; + +/* + * set up a process's task state and segment descriptors + * correctly, given its current size and address in memory. + * this should be called whenever the latter change. + * doesn't change the cpu's current segmentation setup. + */ +void +setupsegs(struct proc *p) +{ + memset(&p->ts, 0, sizeof(struct Taskstate)); + p->ts.ts_ss0 = SEG_KDATA << 3; + p->ts.ts_esp0 = (unsigned)(p->kstack + KSTACKSIZE); + + memset(&p->gdt, 0, sizeof(p->gdt)); + p->gdt[0] = SEG_NULL; + p->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); + p->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + p->gdt[SEG_TSS] = SEG16(STS_T32A, (unsigned) &p->ts, sizeof(p->ts), 0); + p->gdt[SEG_TSS].sd_s = 0; + p->gdt[SEG_UCODE] = SEG(STA_X|STA_R, (unsigned)p->mem, p->sz, 3); + p->gdt[SEG_UDATA] = SEG(STA_W, (unsigned)p->mem, p->sz, 3); + p->gdt_pd.pd__garbage = 0; + p->gdt_pd.pd_lim = sizeof(p->gdt) - 1; + p->gdt_pd.pd_base = (unsigned) p->gdt; +} + +extern void trapret(); + +/* + * internal fork(). does not copy kernel stack; instead, + * sets up the stack to return as if from system call. + */ +struct proc * +newproc(struct proc *op) +{ + struct proc *np; + unsigned *sp; + + for(np = &proc[1]; np < &proc[NPROC]; np++) + if(np->state == UNUSED) + break; + if(np >= &proc[NPROC]) + return 0; + + np->sz = op->sz; + np->mem = kalloc(op->sz); + if(np->mem == 0) + return 0; + memcpy(np->mem, op->mem, np->sz); + np->kstack = kalloc(KSTACKSIZE); + if(np->kstack == 0){ + kfree(np->mem, op->sz); + return 0; + } + np->tf = (struct Trapframe *) (np->kstack + KSTACKSIZE - sizeof(struct Trapframe)); + setupsegs(np); + np->state = RUNNABLE; + + // set up kernel stack to return to user space + *(np->tf) = *(op->tf); + sp = (unsigned *) np->tf; + *(--sp) = (unsigned) &trapret; // for return from swtch() + *(--sp) = 0; // previous bp for leave in swtch() + np->esp = (unsigned) sp; + np->ebp = (unsigned) sp; + + cprintf("esp %x ebp %x mem %x\n", np->esp, np->ebp, np->mem); + + return np; +} + +/* + * find a runnable process and switch to it. + */ +void +swtch(struct proc *op) +{ + struct proc *np; + + while(1){ + for(np = op + 1; np != op; np++){ + if(np == &proc[NPROC]) + np = &proc[0]; + if(np->state == RUNNABLE) + break; + } + if(np->state == RUNNABLE) + break; + // idle... + } + + op->ebp = read_ebp(); + op->esp = read_esp(); + + // XXX callee-saved registers? + + // this happens to work, but probably isn't safe: + // it's not clear that np->ebp will evaluate + // correctly after changing the stack pointer. + asm volatile("lgdt %0" : : "g" (np->gdt_pd.pd_lim)); + asm volatile("movl %0, %%esp" : : "g" (np->esp)); + asm volatile("movl %0, %%ebp" : : "g" (np->ebp)); +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..e5c230c --- /dev/null +++ b/proc.h @@ -0,0 +1,34 @@ +/* + * p->mem: + * text + * original data and bss + * fixed-size stack + * expandable heap + */ + +/* + * segments in proc->gdt + */ +#define SEG_KCODE 1 // kernel code +#define SEG_KDATA 2 // kernel data+stack +#define SEG_UCODE 3 +#define SEG_UDATA 4 +#define SEG_TSS 5 // this process's task state +#define NSEGS 6 + +struct proc{ + char *mem; // start of process's physical memory + unsigned sz; // total size of mem, including kernel stack + char *kstack; // kernel stack, separate from mem so it doesn't move + enum { UNUSED, RUNNABLE, WAITING } state; + + struct Taskstate ts; // only to give cpu address of kernel stack + struct Segdesc gdt[NSEGS]; + struct Pseudodesc gdt_pd; + unsigned esp; // kernel stack pointer + unsigned ebp; // kernel frame pointer + + struct Trapframe *tf; // points into kstack, used to find user regs +}; + +extern struct proc proc[]; diff --git a/sign.pl b/sign.pl new file mode 100755 index 0000000..d84bdc6 --- /dev/null +++ b/sign.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl + +open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; + +$n = sysread(SIG, $buf, 1000); + +if($n > 510){ + print STDERR "boot block too large: $n bytes (max 510)\n"; + exit 1; +} + +print STDERR "boot block is $n bytes (max 510)\n"; + +$buf .= "\0" x (510-$n); +$buf .= "\x55\xAA"; + +open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; +print SIG $buf; +close SIG; diff --git a/string.c b/string.c new file mode 100644 index 0000000..aef4242 --- /dev/null +++ b/string.c @@ -0,0 +1,22 @@ +void * +memcpy(void *dst, void *src, unsigned n) +{ + char *d = (char *) dst; + char *s = (char *) src; + + while(n-- > 0) + *d++ = *s++; + + return dst; +} + +void * +memset(void *dst, int c, unsigned n) +{ + char *d = (char *) dst; + + while(n-- > 0) + *d++ = c; + + return dst; +} diff --git a/trapasm.S b/trapasm.S new file mode 100644 index 0000000..69649ff --- /dev/null +++ b/trapasm.S @@ -0,0 +1,12 @@ + .text + .globl trapret + /* + * a forked process RETs here + * expects ESP to point to a Trapframe + */ +trapret: + popal + popl %es + popl %ds + addl $0x8, %esp /* trapno and errcode */ + iret diff --git a/types.h b/types.h new file mode 100644 index 0000000..01989d6 --- /dev/null +++ b/types.h @@ -0,0 +1,6 @@ +typedef unsigned long long uint64_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; +typedef uint32_t uintptr_t; +typedef uint32_t physaddr_t; diff --git a/x86.h b/x86.h new file mode 100644 index 0000000..134c6d2 --- /dev/null +++ b/x86.h @@ -0,0 +1,301 @@ +static __inline void breakpoint(void) __attribute__((always_inline)); +static __inline uint8_t inb(int port) __attribute__((always_inline)); +static __inline void insb(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline uint16_t inw(int port) __attribute__((always_inline)); +static __inline void insw(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline uint32_t inl(int port) __attribute__((always_inline)); +static __inline void insl(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline void outb(int port, uint8_t data) __attribute__((always_inline)); +static __inline void outsb(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outw(int port, uint16_t data) __attribute__((always_inline)); +static __inline void outsw(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outsl(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outl(int port, uint32_t data) __attribute__((always_inline)); +static __inline void invlpg(void *addr) __attribute__((always_inline)); +static __inline void lidt(void *p) __attribute__((always_inline)); +static __inline void lldt(uint16_t sel) __attribute__((always_inline)); +static __inline void ltr(uint16_t sel) __attribute__((always_inline)); +static __inline void lcr0(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr0(void) __attribute__((always_inline)); +static __inline uint32_t rcr2(void) __attribute__((always_inline)); +static __inline void lcr3(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr3(void) __attribute__((always_inline)); +static __inline void lcr4(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr4(void) __attribute__((always_inline)); +static __inline void tlbflush(void) __attribute__((always_inline)); +static __inline uint32_t read_eflags(void) __attribute__((always_inline)); +static __inline void write_eflags(uint32_t eflags) __attribute__((always_inline)); +static __inline uint32_t read_ebp(void) __attribute__((always_inline)); +static __inline uint32_t read_esp(void) __attribute__((always_inline)); +static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp); +static __inline uint64_t read_tsc(void) __attribute__((always_inline)); + +static __inline void +breakpoint(void) +{ + __asm __volatile("int3"); +} + +static __inline uint8_t +inb(int port) +{ + uint8_t data; + __asm __volatile("inb %w1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static __inline void +insb(int port, void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\tinsb" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static __inline uint16_t +inw(int port) +{ + uint16_t data; + __asm __volatile("inw %w1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static __inline void +insw(int port, void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\tinsw" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static __inline uint32_t +inl(int port) +{ + uint32_t data; + __asm __volatile("inl %w1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static __inline void +insl(int port, void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\tinsl" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static __inline void +outb(int port, uint8_t data) +{ + __asm __volatile("outb %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +outsb(int port, const void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\toutsb" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static __inline void +outw(int port, uint16_t data) +{ + __asm __volatile("outw %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +outsw(int port, const void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\toutsw" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static __inline void +outsl(int port, const void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\toutsl" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static __inline void +outl(int port, uint32_t data) +{ + __asm __volatile("outl %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +invlpg(void *addr) +{ + __asm __volatile("invlpg (%0)" : : "r" (addr) : "memory"); +} + +static __inline void +lidt(void *p) +{ + __asm __volatile("lidt (%0)" : : "r" (p)); +} + +static __inline void +lldt(uint16_t sel) +{ + __asm __volatile("lldt %0" : : "r" (sel)); +} + +static __inline void +ltr(uint16_t sel) +{ + __asm __volatile("ltr %0" : : "r" (sel)); +} + +static __inline void +lcr0(uint32_t val) +{ + __asm __volatile("movl %0,%%cr0" : : "r" (val)); +} + +static __inline uint32_t +rcr0(void) +{ + uint32_t val; + __asm __volatile("movl %%cr0,%0" : "=r" (val)); + return val; +} + +static __inline uint32_t +rcr2(void) +{ + uint32_t val; + __asm __volatile("movl %%cr2,%0" : "=r" (val)); + return val; +} + +static __inline void +lcr3(uint32_t val) +{ + __asm __volatile("movl %0,%%cr3" : : "r" (val)); +} + +static __inline uint32_t +rcr3(void) +{ + uint32_t val; + __asm __volatile("movl %%cr3,%0" : "=r" (val)); + return val; +} + +static __inline void +lcr4(uint32_t val) +{ + __asm __volatile("movl %0,%%cr4" : : "r" (val)); +} + +static __inline uint32_t +rcr4(void) +{ + uint32_t cr4; + __asm __volatile("movl %%cr4,%0" : "=r" (cr4)); + return cr4; +} + +static __inline void +tlbflush(void) +{ + uint32_t cr3; + __asm __volatile("movl %%cr3,%0" : "=r" (cr3)); + __asm __volatile("movl %0,%%cr3" : : "r" (cr3)); +} + +static __inline uint32_t +read_eflags(void) +{ + uint32_t eflags; + __asm __volatile("pushfl; popl %0" : "=r" (eflags)); + return eflags; +} + +static __inline void +write_eflags(uint32_t eflags) +{ + __asm __volatile("pushl %0; popfl" : : "r" (eflags)); +} + +static __inline uint32_t +read_ebp(void) +{ + uint32_t ebp; + __asm __volatile("movl %%ebp,%0" : "=r" (ebp)); + return ebp; +} + +static __inline uint32_t +read_esp(void) +{ + uint32_t esp; + __asm __volatile("movl %%esp,%0" : "=r" (esp)); + return esp; +} + +static __inline void +cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp) +{ + uint32_t eax, ebx, ecx, edx; + asm volatile("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (info)); + if (eaxp) + *eaxp = eax; + if (ebxp) + *ebxp = ebx; + if (ecxp) + *ecxp = ecx; + if (edxp) + *edxp = edx; +} + +static __inline uint64_t +read_tsc(void) +{ + uint64_t tsc; + __asm __volatile("rdtsc" : "=A" (tsc)); + return tsc; +} + +struct PushRegs { + /* registers as pushed by pusha */ + uint32_t reg_edi; + uint32_t reg_esi; + uint32_t reg_ebp; + uint32_t reg_oesp; /* Useless */ + uint32_t reg_ebx; + uint32_t reg_edx; + uint32_t reg_ecx; + uint32_t reg_eax; +}; + +struct Trapframe { + struct PushRegs tf_regs; + uint16_t tf_es; + uint16_t tf_padding1; + uint16_t tf_ds; + uint16_t tf_padding2; + uint32_t tf_trapno; + /* below here defined by x86 hardware */ + uint32_t tf_err; + uintptr_t tf_eip; + uint16_t tf_cs; + uint16_t tf_padding3; + uint32_t tf_eflags; + /* below here only when crossing rings, such as from user to kernel */ + uintptr_t tf_esp; + uint16_t tf_ss; + uint16_t tf_padding4; +};