commit 55e95b16db458b7f9abeca96e541acbdf8d7f85b Author: rtm Date: Mon Jun 12 15:22:12 2006 +0000 import diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e63c77c --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +OBJS = main.o console.o string.o kalloc.o proc.o trapasm.o + +CC = i386-jos-elf-gcc +LD = i386-jos-elf-ld +OBJCOPY = i386-jos-elf-objcopy +OBJDUMP = i386-jos-elf-objdump + +xv6.img : bootblock kernel + dd if=/dev/zero of=xv6.img count=10000 + dd if=bootblock of=xv6.img conv=notrunc + dd if=kernel of=xv6.img seek=1 conv=notrunc + +bootblock : bootasm.S bootmain.c + $(CC) -O -nostdinc -I. -c bootmain.c + $(CC) -nostdinc -I. -c bootasm.S + $(LD) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o + $(OBJDUMP) -S bootblock.o > bootblock.asm + $(OBJCOPY) -S -O binary bootblock.o bootblock + ./sign.pl bootblock + +kernel : $(OBJS) + $(LD) -Ttext 0x100000 -e main -o kernel $(OBJS) + $(OBJDUMP) -S kernel > kernel.asm + +%.o: %.c + $(CC) -nostdinc -I. -O -c -o $@ $< + +clean : + rm -f bootmain.o bootasm.o bootblock.o bootblock + rm -f kernel main.o kernel.asm xv6.img diff --git a/Notes b/Notes new file mode 100644 index 0000000..e5e2c5f --- /dev/null +++ b/Notes @@ -0,0 +1,67 @@ +bootmain.c doesn't work right if the ELF sections aren't +sector-aligned. so you can't use ld -N. and the sections may also need +to be non-zero length, only really matters for tiny "kernels". + +kernel loaded at 1 megabyte. stack same place that bootasm.S left it. + +kinit() should find real mem size + and rescue useable memory below 1 meg + +no paging, no use of page table hardware, just segments + +no user area: no magic kernel stack mapping + so no copying of kernel stack during fork + though there is a kernel stack page for each process + +no kernel malloc(), just kalloc() for user core + +user pointers aren't valid in the kernel + +setting up first process + we do want a process zero, as template + but not runnable + just set up return-from-trap frame on new kernel stack + fake user program that calls exec + +map text read-only? +shared text? + +what's on the stack during a trap or sys call? + PUSHA before scheduler switch? for callee-saved registers. + segment contents? + what does iret need to get out of the kernel? + how does INT know what kernel stack to use? + +are interrupts turned on in the kernel? probably. + +per-cpu curproc +one tss per process, or one per cpu? +one segment array per cpu, or per process? + +pass curproc explicitly, or implicit from cpu #? + e.g. argument to newproc()? + +test stack expansion +test running out of memory, process slots + +we can't really use a separate stack segment, since stack addresses +need to work correctly as ordinary pointers. the same may be true of +data vs text. how can we have a gap between data and stack, so that +both can grow, without committing 4GB of physical memory? does this +mean we need paging? + +what's the simplest way to add the paging we need? + one page table, re-write it each time we leave the kernel? + page table per process? + probably need to use 0-0xffffffff segments, so that + both data and stack pointers always work + so is it now worth it to make a process's phys mem contiguous? + or could use segment limits and 4 meg pages? + but limits would prevent using stack pointers as data pointers + how to write-protect text? not important? + +perhaps have fixed-size stack, put it in the data segment? + +oops, if kernel stack is in contiguous user phys mem, then moving +users' memory (e.g. to expand it) will wreck any pointers into the +kernel stack. diff --git a/bootasm.S b/bootasm.S new file mode 100644 index 0000000..00cbdc9 --- /dev/null +++ b/bootasm.S @@ -0,0 +1,109 @@ +#define SEG_NULL \ + .word 0, 0; \ + .byte 0, 0, 0, 0 +#define SEG(type,base,lim) \ + .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#define STA_X 0x8 // Executable segment +#define STA_E 0x4 // Expand down (non-executable segments) +#define STA_C 0x4 // Conforming code segment (executable only) +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) +#define STA_A 0x1 // Accessed + +.set PROT_MODE_CSEG,0x8 # code segment selector +.set PROT_MODE_DSEG,0x10 # data segment selector +.set CR0_PE_ON,0x1 # protected mode enable flag + +################################################################################### +# ENTRY POINT +# This code should be stored in the first sector of the hard disk. +# After the BIOS initializes the hardware on startup or system reset, +# it loads this code at physical address 0x7c00 - 0x7d00 (512 bytes). +# Then the BIOS jumps to the beginning of it, address 0x7c00, +# while running in 16-bit real-mode (8086 compatibility mode). +# The Code Segment register (CS) is initially zero on entry. +# +# This code switches into 32-bit protected mode so that all of +# memory can accessed, then calls into C. +################################################################################### + +.globl start # Entry point +start: .code16 # This runs in real mode + cli # Disable interrupts + cld # String operations increment + + # Set up the important data segment registers (DS, ES, SS). + xorw %ax,%ax # Segment number zero + movw %ax,%ds # -> Data Segment + movw %ax,%es # -> Extra Segment + movw %ax,%ss # -> Stack Segment + + # Set up the stack pointer, growing downward from 0x7c00. + movw $start,%sp # Stack Pointer + +#### Enable A20: +#### For fascinating historical reasons (related to the fact that +#### the earliest 8086-based PCs could only address 1MB of physical memory +#### and subsequent 80286-based PCs wanted to retain maximum compatibility), +#### physical address line 20 is tied to low when the machine boots. +#### Obviously this a bit of a drag for us, especially when trying to +#### address memory above 1MB. This code undoes this. + +seta20.1: inb $0x64,%al # Get status + testb $0x2,%al # Busy? + jnz seta20.1 # Yes + movb $0xd1,%al # Command: Write + outb %al,$0x64 # output port +seta20.2: inb $0x64,%al # Get status + testb $0x2,%al # Busy? + jnz seta20.2 # Yes + movb $0xdf,%al # Enable + outb %al,$0x60 # A20 + +#### Switch from real to protected mode +#### The descriptors in our GDT allow all physical memory to be accessed. +#### Furthermore, the descriptors have base addresses of 0, so that the +#### segment translation is a NOP, ie. virtual addresses are identical to +#### their physical addresses. With this setup, immediately after +#### enabling protected mode it will still appear to this code +#### that it is running directly on physical memory with no translation. +#### This initial NOP-translation setup is required by the processor +#### to ensure that the transition to protected mode occurs smoothly. + +real_to_prot: cli # Mandatory since we dont set up an IDT + lgdt gdtdesc # load GDT -- mandatory in protected mode + movl %cr0, %eax # turn on protected mode + orl $CR0_PE_ON, %eax # + movl %eax, %cr0 # + ### CPU magic: jump to relocation, flush prefetch queue, and reload %cs + ### Has the effect of just jmp to the next instruction, but simultaneous + ### loads CS with $PROT_MODE_CSEG. + ljmp $PROT_MODE_CSEG, $protcseg + +#### we are in 32-bit protected mode (hence the .code32) +.code32 +protcseg: + # Set up the protected-mode data segment registers + movw $PROT_MODE_DSEG, %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + movw %ax, %ss # -> SS: Stack Segment + + call cmain # finish the boot load from C. + # cmain() should not return +spin: jmp spin # ..but in case it does, spin + +.p2align 2 # force 4 byte alignment +gdt: + SEG_NULL # null seg + SEG(STA_X|STA_R, 0x0, 0xffffffff) # code seg + SEG(STA_W, 0x0, 0xffffffff) # data seg + +gdtdesc: + .word 0x17 # sizeof(gdt) - 1 + .long gdt # address gdt diff --git a/bootmain.c b/bootmain.c new file mode 100644 index 0000000..79d769c --- /dev/null +++ b/bootmain.c @@ -0,0 +1,121 @@ +#include +#include +#include + +/********************************************************************** + * This a dirt simple boot loader, whose sole job is to boot + * an elf kernel image from the first IDE hard disk. + * + * DISK LAYOUT + * * This program(boot.S and main.c) is the bootloader. It should + * be stored in the first sector of the disk. + * + * * The 2nd sector onward holds the kernel image. + * + * * The kernel image must be in ELF format. + * + * BOOT UP STEPS + * * when the CPU boots it loads the BIOS into memory and executes it + * + * * the BIOS intializes devices, sets of the interrupt routines, and + * reads the first sector of the boot device(e.g., hard-drive) + * into memory and jumps to it. + * + * * Assuming this boot loader is stored in the first sector of the + * hard-drive, this code takes over... + * + * * control starts in bootloader.S -- which sets up protected mode, + * and a stack so C code then run, then calls cmain() + * + * * cmain() in this file takes over, reads in the kernel and jumps to it. + **********************************************************************/ + +#define SECTSIZE 512 +#define ELFHDR ((struct Elf *) 0x10000) // scratch space + +void readsect(void*, uint32_t); +void readseg(uint32_t, uint32_t, uint32_t); + +void +cmain(void) +{ + struct Proghdr *ph, *eph; + + // read 1st page off disk + readseg((uint32_t) ELFHDR, SECTSIZE*8, 0); + + // is this a valid ELF? + if (ELFHDR->e_magic != ELF_MAGIC) + goto bad; + + // load each program segment (ignores ph flags) + ph = (struct Proghdr *) ((uint8_t *) ELFHDR + ELFHDR->e_phoff); + eph = ph + ELFHDR->e_phnum; + for (; ph < eph; ph++) + readseg(ph->p_va, ph->p_memsz, ph->p_offset); + + // call the entry point from the ELF header + // note: does not return! + ((void (*)(void)) (ELFHDR->e_entry & 0xFFFFFF))(); + +bad: + outw(0x8A00, 0x8A00); + outw(0x8A00, 0x8E00); + while (1) + /* do nothing */; +} + +// Read 'count' bytes at 'offset' from kernel into virtual address 'va'. +// Might copy more than asked +void +readseg(uint32_t va, uint32_t count, uint32_t offset) +{ + uint32_t end_va; + + va &= 0xFFFFFF; + end_va = va + count; + + // round down to sector boundary + va &= ~(SECTSIZE - 1); + + // translate from bytes to sectors, and kernel starts at sector 1 + offset = (offset / SECTSIZE) + 1; + + // If this is too slow, we could read lots of sectors at a time. + // We'd write more to memory than asked, but it doesn't matter -- + // we load in increasing order. + while (va < end_va) { + readsect((uint8_t*) va, offset); + va += SECTSIZE; + offset++; + } +} + +void +waitdisk(void) +{ + // wait for disk reaady + while ((inb(0x1F7) & 0xC0) != 0x40) + /* do nothing */; +} + +void +readsect(void *dst, uint32_t offset) +{ + // wait for disk to be ready + waitdisk(); + + outb(0x1F2, 1); // count = 1 + outb(0x1F3, offset); + outb(0x1F4, offset >> 8); + outb(0x1F5, offset >> 16); + outb(0x1F6, (offset >> 24) | 0xE0); + outb(0x1F7, 0x20); // cmd 0x20 - read sectors + + // wait for disk to be ready + waitdisk(); + + // read a sector + insl(0x1F0, dst, SECTSIZE/4); +} + diff --git a/console.c b/console.c new file mode 100644 index 0000000..2035611 --- /dev/null +++ b/console.c @@ -0,0 +1,108 @@ +#include +#include +#include "defs.h" + +void +cons_putc(int c) +{ + int crtport = 0x3d4; // io port of CGA + unsigned short *crt = (unsigned short *) 0xB8000; // base of CGA memory + int ind; + + // cursor position, 16 bits, col + 80*row + outb(crtport, 14); + ind = inb(crtport + 1) << 8; + outb(crtport, 15); + ind |= inb(crtport + 1); + + c &= 0xff; + + if(c == '\n'){ + ind -= (ind % 80); + ind += 80; + } else { + c |= 0x0700; // black on white + crt[ind] = c; + ind += 1; + } + + if((ind / 80) >= 24){ + // scroll up + memcpy(crt, crt + 80, sizeof(crt[0]) * (23 * 80)); + ind -= 80; + memset(crt + ind, 0, sizeof(crt[0]) * ((24 * 80) - ind)); + } + + outb(crtport, 14); + outb(crtport + 1, ind >> 8); + outb(crtport, 15); + outb(crtport + 1, ind); +} + +void +printint(int xx, int base, int sgn) +{ + char buf[16]; + char digits[] = "0123456789ABCDEF"; + int i = 0, neg = 0; + unsigned int x; + + if(sgn && xx < 0){ + neg = 1; + x = 0 - xx; + } else { + x = xx; + } + + do { + buf[i++] = digits[x % base]; + } while((x /= base) != 0); + if(neg) + buf[i++] = '-'; + + while(i > 0){ + i -= 1; + cons_putc(buf[i]); + } +} + +/* + * print to the console. only understands %d and %x. + */ +void +cprintf(char *fmt, ...) +{ + int i, state = 0, c; + unsigned int *ap = (unsigned int *) &fmt + 1; + + for(i = 0; fmt[i]; i++){ + c = fmt[i] & 0xff; + if(state == 0){ + if(c == '%'){ + state = '%'; + } else { + cons_putc(c); + } + } else if(state == '%'){ + if(c == 'd'){ + printint(*ap, 10, 1); + ap++; + } else if(c == 'x'){ + printint(*ap, 16, 0); + ap++; + } else if(c == '%'){ + cons_putc(c); + } + state = 0; + } + } +} + +void +panic(char *s) +{ + cprintf(s, 0); + cprintf("\n", 0); + while(1) + ; +} diff --git a/defs.h b/defs.h new file mode 100644 index 0000000..ec41bfe --- /dev/null +++ b/defs.h @@ -0,0 +1,12 @@ +// kalloc.c +char *kalloc(int n); +void kfree(char *cp, int len); + +// console.c +void cprintf(char *fmt, ...); +void panic(char *s); + +// proc.c +struct proc; +void setupsegs(struct proc *p); +struct proc * newproc(struct proc *op); diff --git a/elf.h b/elf.h new file mode 100644 index 0000000..ea9f964 --- /dev/null +++ b/elf.h @@ -0,0 +1,43 @@ +#ifndef JOS_INC_ELF_H +#define JOS_INC_ELF_H + +#define ELF_MAGIC 0x464C457FU /* "\x7FELF" in little endian */ + +struct Elf { + uint32_t e_magic; // must equal ELF_MAGIC + uint8_t e_elf[12]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +}; + +struct Proghdr { + uint32_t p_type; + uint32_t p_offset; + uint32_t p_va; + uint32_t p_pa; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +}; + +// Values for Proghdr::p_type +#define ELF_PROG_LOAD 1 + +// Flag bits for Proghdr::p_flags +#define ELF_PROG_FLAG_EXEC 1 +#define ELF_PROG_FLAG_WRITE 2 +#define ELF_PROG_FLAG_READ 4 + +#endif /* !JOS_INC_ELF_H */ diff --git a/kalloc.c b/kalloc.c new file mode 100644 index 0000000..5ea38fd --- /dev/null +++ b/kalloc.c @@ -0,0 +1,158 @@ +/* + * physical memory allocator, intended to be used to allocate + * memory for user processes. allocates in 4096-byte "pages". + * free list is sorted and combines adjacent pages into + * long runs, to make it easier to allocate big segments. + * one reason the page size is 4k is that the x86 segment size + * granularity is 4k. + */ + +#include "param.h" +#include "types.h" +#include "defs.h" + +struct run { + struct run *next; + int len; // bytes +}; +struct run *freelist; + +void ktest(); + +/* + * initialize free list of physical pages. this code + * cheats by just considering the one megabyte of pages + * after _end. + */ +void +kinit() +{ + extern int end; + unsigned mem; + char *start; + + start = (char *) &end; + start = (char *) (((unsigned)start + PAGE) & ~(PAGE-1)); + mem = 256; // XXX + cprintf("mem = %d\n", mem * PAGE); + kfree(start, mem * PAGE); + ktest(); +} + +void +kfree(char *cp, int len) +{ + struct run **rr; + struct run *p = (struct run *) cp; + struct run *pend = (struct run *) (cp + len); + + if(len % PAGE) + panic("kfree"); + + rr = &freelist; + while(*rr){ + struct run *rend = (struct run *) ((char *)(*rr) + (*rr)->len); + if(p >= *rr && p < rend) + panic("freeing free page"); + if(pend == *rr){ + p->len = len + (*rr)->len; + p->next = (*rr)->next; + *rr = p; + return; + } + if(pend < *rr){ + p->len = len; + p->next = *rr; + *rr = p; + return; + } + if(p == rend){ + (*rr)->len += len; + if((*rr)->next && (*rr)->next == pend){ + (*rr)->len += (*rr)->next->len; + (*rr)->next = (*rr)->next->next; + } + return; + } + rr = &((*rr)->next); + } + p->len = len; + p->next = 0; + *rr = p; +} + +/* + * allocate n bytes of physical memory. + * returns a kernel-segment pointer. + * returns 0 if there's no run that's big enough. + */ +char * +kalloc(int n) +{ + struct run **rr; + + if(n % PAGE) + panic("kalloc"); + + rr = &freelist; + while(*rr){ + struct run *r = *rr; + if(r->len == n){ + *rr = r->next; + return (char *) r; + } + if(r->len > n){ + char *p = (char *)r + (r->len - n); + r->len -= n; + return p; + } + rr = &(*rr)->next; + } + return 0; +} + +void +ktest() +{ + char *p1, *p2, *p3; + + // test coalescing + p1 = kalloc(4 * PAGE); + kfree(p1 + 3*PAGE, PAGE); + kfree(p1 + 2*PAGE, PAGE); + kfree(p1, PAGE); + kfree(p1 + PAGE, PAGE); + p2 = kalloc(4 * PAGE); + if(p2 != p1) + panic("ktest"); + kfree(p2, 4 * PAGE); + + // test finding first run that fits + p1 = kalloc(1 * PAGE); + p2 = kalloc(1 * PAGE); + kfree(p1, PAGE); + p3 = kalloc(2 * PAGE); + kfree(p2, PAGE); + kfree(p3, 2 * PAGE); + + // test running out of memory + p1 = 0; + while(1){ + p2 = kalloc(PAGE); + if(p2 == 0) + break; + *(char **)p2 = p1; + p1 = p2; + } + while(p1){ + p2 = *(char **)p1; + kfree(p1, PAGE); + p1 = p2; + } + p1 = kalloc(PAGE * 20); + if(p1 == 0) + panic("ktest2"); + kfree(p1, PAGE * 20); + + cprintf("ktest ok\n"); +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..a1c08c9 --- /dev/null +++ b/main.c @@ -0,0 +1,40 @@ +#include "types.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "x86.h" + +char junk1[20000]; +char junk2[20000] = { 1 }; + +main() +{ + struct proc *p; + + cprintf("\nxV6\n\n"); + + // initialize physical memory allocator + kinit(); + + // create fake process zero + p = &proc[0]; + p->state = WAITING; + p->sz = PAGE; + p->mem = kalloc(p->sz); + memset(p->mem, 0, p->sz); + p->kstack = kalloc(KSTACKSIZE); + p->tf = (struct Trapframe *) (p->kstack + KSTACKSIZE - sizeof(struct Trapframe)); + memset(p->tf, 0, sizeof(struct Trapframe)); + p->tf->tf_es = p->tf->tf_ds = p->tf->tf_ss = (SEG_UDATA << 3) | 3; + p->tf->tf_cs = (SEG_UCODE << 3) | 3; + p->tf->tf_eflags = FL_IF; + setupsegs(p); + + p = newproc(&proc[0]); + // xxx copy instructions to p->mem + p->tf->tf_eip = 0; + p->tf->tf_esp = p->sz; + + swtch(&proc[0]); +} diff --git a/mmu.h b/mmu.h new file mode 100644 index 0000000..776db23 --- /dev/null +++ b/mmu.h @@ -0,0 +1,308 @@ +/* + * This file contains definitions for the x86 memory management unit (MMU), + * including paging- and segmentation-related data structures and constants, + * the %cr0, %cr4, and %eflags registers, and traps. + */ + +/* + * + * Part 1. Paging data structures and constants. + * + */ + +// A linear address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory | Page Table | Offset within Page | +// | Index | Index | | +// +----------------+----------------+---------------------+ +// \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ +// \----------- PPN(la) -----------/ +// +// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. +// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), +// use PGADDR(PDX(la), PTX(la), PGOFF(la)). + +// page number field of address +#define PPN(la) (((uintptr_t) (la)) >> PTXSHIFT) +#define VPN(la) PPN(la) // used to index into vpt[] + +// page directory index +#define PDX(la) ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF) +#define VPD(la) PDX(la) // used to index into vpd[] + +// page table index +#define PTX(la) ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF) + +// offset in page +#define PGOFF(la) (((uintptr_t) (la)) & 0xFFF) + +// construct linear address from indexes and offset +#define PGADDR(d, t, o) ((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// Page directory and page table constants. +#define NPDENTRIES 1024 // page directory entries per page directory +#define NPTENTRIES 1024 // page table entries per page table + +#define PGSIZE 4096 // bytes mapped by a page +#define PGSHIFT 12 // log2(PGSIZE) + +#define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry +#define PTSHIFT 22 // log2(PTSIZE) + +#define PTXSHIFT 12 // offset of PTX in a linear address +#define PDXSHIFT 22 // offset of PDX in a linear address + +// Page table/directory entry flags. +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PWT 0x008 // Write-Through +#define PTE_PCD 0x010 // Cache-Disable +#define PTE_A 0x020 // Accessed +#define PTE_D 0x040 // Dirty +#define PTE_PS 0x080 // Page Size +#define PTE_MBZ 0x180 // Bits must be zero + +// The PTE_AVAIL bits aren't used by the kernel or interpreted by the +// hardware, so user processes are allowed to set them arbitrarily. +#define PTE_AVAIL 0xE00 // Available for software use + +// Only flags in PTE_USER may be used in system calls. +#define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U) + +// address in page table entry +#define PTE_ADDR(pte) ((physaddr_t) (pte) & ~0xFFF) + +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_MP 0x00000002 // Monitor coProcessor +#define CR0_EM 0x00000004 // Emulation +#define CR0_TS 0x00000008 // Task Switched +#define CR0_ET 0x00000010 // Extension Type +#define CR0_NE 0x00000020 // Numeric Errror +#define CR0_WP 0x00010000 // Write Protect +#define CR0_AM 0x00040000 // Alignment Mask +#define CR0_NW 0x20000000 // Not Writethrough +#define CR0_CD 0x40000000 // Cache Disable +#define CR0_PG 0x80000000 // Paging + +#define CR4_PCE 0x00000100 // Performance counter enable +#define CR4_MCE 0x00000040 // Machine Check Enable +#define CR4_PSE 0x00000010 // Page Size Extensions +#define CR4_DE 0x00000008 // Debugging Extensions +#define CR4_TSD 0x00000004 // Time Stamp Disable +#define CR4_PVI 0x00000002 // Protected-Mode Virtual Interrupts +#define CR4_VME 0x00000001 // V86 Mode Extensions + +// Eflags register +#define FL_CF 0x00000001 // Carry Flag +#define FL_PF 0x00000004 // Parity Flag +#define FL_AF 0x00000010 // Auxiliary carry Flag +#define FL_ZF 0x00000040 // Zero Flag +#define FL_SF 0x00000080 // Sign Flag +#define FL_TF 0x00000100 // Trap Flag +#define FL_IF 0x00000200 // Interrupt Flag +#define FL_DF 0x00000400 // Direction Flag +#define FL_OF 0x00000800 // Overflow Flag +#define FL_IOPL_MASK 0x00003000 // I/O Privilege Level bitmask +#define FL_IOPL_0 0x00000000 // IOPL == 0 +#define FL_IOPL_1 0x00001000 // IOPL == 1 +#define FL_IOPL_2 0x00002000 // IOPL == 2 +#define FL_IOPL_3 0x00003000 // IOPL == 3 +#define FL_NT 0x00004000 // Nested Task +#define FL_RF 0x00010000 // Resume Flag +#define FL_VM 0x00020000 // Virtual 8086 mode +#define FL_AC 0x00040000 // Alignment Check +#define FL_VIF 0x00080000 // Virtual Interrupt Flag +#define FL_VIP 0x00100000 // Virtual Interrupt Pending +#define FL_ID 0x00200000 // ID flag + +// Page fault error codes +#define FEC_PR 0x1 // Page fault caused by protection violation +#define FEC_WR 0x2 // Page fault caused by a write +#define FEC_U 0x4 // Page fault occured while in user mode + + +/* + * + * Part 2. Segmentation data structures and constants. + * + */ + +#ifdef __ASSEMBLER__ + +/* + * Macros to build GDT entries in assembly. + */ +#define SEG_NULL \ + .word 0, 0; \ + .byte 0, 0, 0, 0 +#define SEG(type,base,lim) \ + .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#else // not __ASSEMBLER__ + +// Segment Descriptors +struct Segdesc { + unsigned sd_lim_15_0 : 16; // Low bits of segment limit + unsigned sd_base_15_0 : 16; // Low bits of segment base address + unsigned sd_base_23_16 : 8; // Middle bits of segment base address + unsigned sd_type : 4; // Segment type (see STS_ constants) + unsigned sd_s : 1; // 0 = system, 1 = application + unsigned sd_dpl : 2; // Descriptor Privilege Level + unsigned sd_p : 1; // Present + unsigned sd_lim_19_16 : 4; // High bits of segment limit + unsigned sd_avl : 1; // Unused (available for software use) + unsigned sd_rsv1 : 1; // Reserved + unsigned sd_db : 1; // 0 = 16-bit segment, 1 = 32-bit segment + unsigned sd_g : 1; // Granularity: limit scaled by 4K when set + unsigned sd_base_31_24 : 8; // High bits of segment base address +}; +// Null segment +#define SEG_NULL (struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +// Segment that is loadable but faults when used +#define SEG_FAULT (struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 } +// Normal segment +#define SEG(type, base, lim, dpl) (struct Segdesc) \ +{ ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ + type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1, \ + (unsigned) (base) >> 24 } +#define SEG16(type, base, lim, dpl) (struct Segdesc) \ +{ (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ + type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0, \ + (unsigned) (base) >> 24 } + +#endif /* !__ASSEMBLER__ */ + +// Application segment type bits +#define STA_X 0x8 // Executable segment +#define STA_E 0x4 // Expand down (non-executable segments) +#define STA_C 0x4 // Conforming code segment (executable only) +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) +#define STA_A 0x1 // Accessed + +// System segment type bits +#define STS_T16A 0x1 // Available 16-bit TSS +#define STS_LDT 0x2 // Local Descriptor Table +#define STS_T16B 0x3 // Busy 16-bit TSS +#define STS_CG16 0x4 // 16-bit Call Gate +#define STS_TG 0x5 // Task Gate / Coum Transmitions +#define STS_IG16 0x6 // 16-bit Interrupt Gate +#define STS_TG16 0x7 // 16-bit Trap Gate +#define STS_T32A 0x9 // Available 32-bit TSS +#define STS_T32B 0xB // Busy 32-bit TSS +#define STS_CG32 0xC // 32-bit Call Gate +#define STS_IG32 0xE // 32-bit Interrupt Gate +#define STS_TG32 0xF // 32-bit Trap Gate + + +/* + * + * Part 3. Traps. + * + */ + +#ifndef __ASSEMBLER__ + +// Task state segment format (as described by the Pentium architecture book) +struct Taskstate { + uint32_t ts_link; // Old ts selector + uintptr_t ts_esp0; // Stack pointers and segment selectors + uint16_t ts_ss0; // after an increase in privilege level + uint16_t ts_padding1; + uintptr_t ts_esp1; + uint16_t ts_ss1; + uint16_t ts_padding2; + uintptr_t ts_esp2; + uint16_t ts_ss2; + uint16_t ts_padding3; + physaddr_t ts_cr3; // Page directory base + uintptr_t ts_eip; // Saved state from last task switch + uint32_t ts_eflags; + uint32_t ts_eax; // More saved state (registers) + uint32_t ts_ecx; + uint32_t ts_edx; + uint32_t ts_ebx; + uintptr_t ts_esp; + uintptr_t ts_ebp; + uint32_t ts_esi; + uint32_t ts_edi; + uint16_t ts_es; // Even more saved state (segment selectors) + uint16_t ts_padding4; + uint16_t ts_cs; + uint16_t ts_padding5; + uint16_t ts_ss; + uint16_t ts_padding6; + uint16_t ts_ds; + uint16_t ts_padding7; + uint16_t ts_fs; + uint16_t ts_padding8; + uint16_t ts_gs; + uint16_t ts_padding9; + uint16_t ts_ldt; + uint16_t ts_padding10; + uint16_t ts_t; // Trap on task switch + uint16_t ts_iomb; // I/O map base address +}; + +// Gate descriptors for interrupts and traps +struct Gatedesc { + unsigned gd_off_15_0 : 16; // low 16 bits of offset in segment + unsigned gd_ss : 16; // segment selector + unsigned gd_args : 5; // # args, 0 for interrupt/trap gates + unsigned gd_rsv1 : 3; // reserved(should be zero I guess) + unsigned gd_type : 4; // type(STS_{TG,IG32,TG32}) + unsigned gd_s : 1; // must be 0 (system) + unsigned gd_dpl : 2; // descriptor(meaning new) privilege level + unsigned gd_p : 1; // Present + unsigned gd_off_31_16 : 16; // high bits of offset in segment +}; + +// Set up a normal interrupt/trap gate descriptor. +// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. +// - sel: Code segment selector for interrupt/trap handler +// - off: Offset in code segment for interrupt/trap handler +// - dpl: Descriptor Privilege Level - +// the privilege level required for software to invoke +// this interrupt/trap gate explicitly using an int instruction. +#define SETGATE(gate, istrap, sel, off, dpl) \ +{ \ + (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \ + (gate).gd_ss = (sel); \ + (gate).gd_args = 0; \ + (gate).gd_rsv1 = 0; \ + (gate).gd_type = (istrap) ? STS_TG32 : STS_IG32; \ + (gate).gd_s = 0; \ + (gate).gd_dpl = (dpl); \ + (gate).gd_p = 1; \ + (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \ +} + +// Set up a call gate descriptor. +#define SETCALLGATE(gate, ss, off, dpl) \ +{ \ + (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \ + (gate).gd_ss = (ss); \ + (gate).gd_args = 0; \ + (gate).gd_rsv1 = 0; \ + (gate).gd_type = STS_CG32; \ + (gate).gd_s = 0; \ + (gate).gd_dpl = (dpl); \ + (gate).gd_p = 1; \ + (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \ +} + +// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions. +struct Pseudodesc { + uint16_t pd__garbage; // LGDT supposed to be from address 4N+2 + uint16_t pd_lim; // Limit + uint32_t pd_base __attribute__ ((packed)); // Base address +}; +#define PD_ADDR(desc) (&(desc).pd_lim) + +#endif /* !__ASSEMBLER__ */ + diff --git a/param.h b/param.h new file mode 100644 index 0000000..798dc5b --- /dev/null +++ b/param.h @@ -0,0 +1,3 @@ +#define NPROC 64 +#define PAGE 4096 +#define KSTACKSIZE PAGE diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..fda834e --- /dev/null +++ b/proc.c @@ -0,0 +1,112 @@ +#include "types.h" +#include "mmu.h" +#include "x86.h" +#include "proc.h" +#include "param.h" +#include "defs.h" + +struct proc proc[NPROC]; + +/* + * set up a process's task state and segment descriptors + * correctly, given its current size and address in memory. + * this should be called whenever the latter change. + * doesn't change the cpu's current segmentation setup. + */ +void +setupsegs(struct proc *p) +{ + memset(&p->ts, 0, sizeof(struct Taskstate)); + p->ts.ts_ss0 = SEG_KDATA << 3; + p->ts.ts_esp0 = (unsigned)(p->kstack + KSTACKSIZE); + + memset(&p->gdt, 0, sizeof(p->gdt)); + p->gdt[0] = SEG_NULL; + p->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); + p->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + p->gdt[SEG_TSS] = SEG16(STS_T32A, (unsigned) &p->ts, sizeof(p->ts), 0); + p->gdt[SEG_TSS].sd_s = 0; + p->gdt[SEG_UCODE] = SEG(STA_X|STA_R, (unsigned)p->mem, p->sz, 3); + p->gdt[SEG_UDATA] = SEG(STA_W, (unsigned)p->mem, p->sz, 3); + p->gdt_pd.pd__garbage = 0; + p->gdt_pd.pd_lim = sizeof(p->gdt) - 1; + p->gdt_pd.pd_base = (unsigned) p->gdt; +} + +extern void trapret(); + +/* + * internal fork(). does not copy kernel stack; instead, + * sets up the stack to return as if from system call. + */ +struct proc * +newproc(struct proc *op) +{ + struct proc *np; + unsigned *sp; + + for(np = &proc[1]; np < &proc[NPROC]; np++) + if(np->state == UNUSED) + break; + if(np >= &proc[NPROC]) + return 0; + + np->sz = op->sz; + np->mem = kalloc(op->sz); + if(np->mem == 0) + return 0; + memcpy(np->mem, op->mem, np->sz); + np->kstack = kalloc(KSTACKSIZE); + if(np->kstack == 0){ + kfree(np->mem, op->sz); + return 0; + } + np->tf = (struct Trapframe *) (np->kstack + KSTACKSIZE - sizeof(struct Trapframe)); + setupsegs(np); + np->state = RUNNABLE; + + // set up kernel stack to return to user space + *(np->tf) = *(op->tf); + sp = (unsigned *) np->tf; + *(--sp) = (unsigned) &trapret; // for return from swtch() + *(--sp) = 0; // previous bp for leave in swtch() + np->esp = (unsigned) sp; + np->ebp = (unsigned) sp; + + cprintf("esp %x ebp %x mem %x\n", np->esp, np->ebp, np->mem); + + return np; +} + +/* + * find a runnable process and switch to it. + */ +void +swtch(struct proc *op) +{ + struct proc *np; + + while(1){ + for(np = op + 1; np != op; np++){ + if(np == &proc[NPROC]) + np = &proc[0]; + if(np->state == RUNNABLE) + break; + } + if(np->state == RUNNABLE) + break; + // idle... + } + + op->ebp = read_ebp(); + op->esp = read_esp(); + + // XXX callee-saved registers? + + // this happens to work, but probably isn't safe: + // it's not clear that np->ebp will evaluate + // correctly after changing the stack pointer. + asm volatile("lgdt %0" : : "g" (np->gdt_pd.pd_lim)); + asm volatile("movl %0, %%esp" : : "g" (np->esp)); + asm volatile("movl %0, %%ebp" : : "g" (np->ebp)); +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..e5c230c --- /dev/null +++ b/proc.h @@ -0,0 +1,34 @@ +/* + * p->mem: + * text + * original data and bss + * fixed-size stack + * expandable heap + */ + +/* + * segments in proc->gdt + */ +#define SEG_KCODE 1 // kernel code +#define SEG_KDATA 2 // kernel data+stack +#define SEG_UCODE 3 +#define SEG_UDATA 4 +#define SEG_TSS 5 // this process's task state +#define NSEGS 6 + +struct proc{ + char *mem; // start of process's physical memory + unsigned sz; // total size of mem, including kernel stack + char *kstack; // kernel stack, separate from mem so it doesn't move + enum { UNUSED, RUNNABLE, WAITING } state; + + struct Taskstate ts; // only to give cpu address of kernel stack + struct Segdesc gdt[NSEGS]; + struct Pseudodesc gdt_pd; + unsigned esp; // kernel stack pointer + unsigned ebp; // kernel frame pointer + + struct Trapframe *tf; // points into kstack, used to find user regs +}; + +extern struct proc proc[]; diff --git a/sign.pl b/sign.pl new file mode 100755 index 0000000..d84bdc6 --- /dev/null +++ b/sign.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl + +open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; + +$n = sysread(SIG, $buf, 1000); + +if($n > 510){ + print STDERR "boot block too large: $n bytes (max 510)\n"; + exit 1; +} + +print STDERR "boot block is $n bytes (max 510)\n"; + +$buf .= "\0" x (510-$n); +$buf .= "\x55\xAA"; + +open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; +print SIG $buf; +close SIG; diff --git a/string.c b/string.c new file mode 100644 index 0000000..aef4242 --- /dev/null +++ b/string.c @@ -0,0 +1,22 @@ +void * +memcpy(void *dst, void *src, unsigned n) +{ + char *d = (char *) dst; + char *s = (char *) src; + + while(n-- > 0) + *d++ = *s++; + + return dst; +} + +void * +memset(void *dst, int c, unsigned n) +{ + char *d = (char *) dst; + + while(n-- > 0) + *d++ = c; + + return dst; +} diff --git a/trapasm.S b/trapasm.S new file mode 100644 index 0000000..69649ff --- /dev/null +++ b/trapasm.S @@ -0,0 +1,12 @@ + .text + .globl trapret + /* + * a forked process RETs here + * expects ESP to point to a Trapframe + */ +trapret: + popal + popl %es + popl %ds + addl $0x8, %esp /* trapno and errcode */ + iret diff --git a/types.h b/types.h new file mode 100644 index 0000000..01989d6 --- /dev/null +++ b/types.h @@ -0,0 +1,6 @@ +typedef unsigned long long uint64_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; +typedef uint32_t uintptr_t; +typedef uint32_t physaddr_t; diff --git a/x86.h b/x86.h new file mode 100644 index 0000000..134c6d2 --- /dev/null +++ b/x86.h @@ -0,0 +1,301 @@ +static __inline void breakpoint(void) __attribute__((always_inline)); +static __inline uint8_t inb(int port) __attribute__((always_inline)); +static __inline void insb(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline uint16_t inw(int port) __attribute__((always_inline)); +static __inline void insw(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline uint32_t inl(int port) __attribute__((always_inline)); +static __inline void insl(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline void outb(int port, uint8_t data) __attribute__((always_inline)); +static __inline void outsb(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outw(int port, uint16_t data) __attribute__((always_inline)); +static __inline void outsw(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outsl(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outl(int port, uint32_t data) __attribute__((always_inline)); +static __inline void invlpg(void *addr) __attribute__((always_inline)); +static __inline void lidt(void *p) __attribute__((always_inline)); +static __inline void lldt(uint16_t sel) __attribute__((always_inline)); +static __inline void ltr(uint16_t sel) __attribute__((always_inline)); +static __inline void lcr0(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr0(void) __attribute__((always_inline)); +static __inline uint32_t rcr2(void) __attribute__((always_inline)); +static __inline void lcr3(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr3(void) __attribute__((always_inline)); +static __inline void lcr4(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr4(void) __attribute__((always_inline)); +static __inline void tlbflush(void) __attribute__((always_inline)); +static __inline uint32_t read_eflags(void) __attribute__((always_inline)); +static __inline void write_eflags(uint32_t eflags) __attribute__((always_inline)); +static __inline uint32_t read_ebp(void) __attribute__((always_inline)); +static __inline uint32_t read_esp(void) __attribute__((always_inline)); +static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp); +static __inline uint64_t read_tsc(void) __attribute__((always_inline)); + +static __inline void +breakpoint(void) +{ + __asm __volatile("int3"); +} + +static __inline uint8_t +inb(int port) +{ + uint8_t data; + __asm __volatile("inb %w1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static __inline void +insb(int port, void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\tinsb" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static __inline uint16_t +inw(int port) +{ + uint16_t data; + __asm __volatile("inw %w1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static __inline void +insw(int port, void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\tinsw" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static __inline uint32_t +inl(int port) +{ + uint32_t data; + __asm __volatile("inl %w1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static __inline void +insl(int port, void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\tinsl" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static __inline void +outb(int port, uint8_t data) +{ + __asm __volatile("outb %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +outsb(int port, const void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\toutsb" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static __inline void +outw(int port, uint16_t data) +{ + __asm __volatile("outw %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +outsw(int port, const void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\toutsw" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static __inline void +outsl(int port, const void *addr, int cnt) +{ + __asm __volatile("cld\n\trepne\n\toutsl" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static __inline void +outl(int port, uint32_t data) +{ + __asm __volatile("outl %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +invlpg(void *addr) +{ + __asm __volatile("invlpg (%0)" : : "r" (addr) : "memory"); +} + +static __inline void +lidt(void *p) +{ + __asm __volatile("lidt (%0)" : : "r" (p)); +} + +static __inline void +lldt(uint16_t sel) +{ + __asm __volatile("lldt %0" : : "r" (sel)); +} + +static __inline void +ltr(uint16_t sel) +{ + __asm __volatile("ltr %0" : : "r" (sel)); +} + +static __inline void +lcr0(uint32_t val) +{ + __asm __volatile("movl %0,%%cr0" : : "r" (val)); +} + +static __inline uint32_t +rcr0(void) +{ + uint32_t val; + __asm __volatile("movl %%cr0,%0" : "=r" (val)); + return val; +} + +static __inline uint32_t +rcr2(void) +{ + uint32_t val; + __asm __volatile("movl %%cr2,%0" : "=r" (val)); + return val; +} + +static __inline void +lcr3(uint32_t val) +{ + __asm __volatile("movl %0,%%cr3" : : "r" (val)); +} + +static __inline uint32_t +rcr3(void) +{ + uint32_t val; + __asm __volatile("movl %%cr3,%0" : "=r" (val)); + return val; +} + +static __inline void +lcr4(uint32_t val) +{ + __asm __volatile("movl %0,%%cr4" : : "r" (val)); +} + +static __inline uint32_t +rcr4(void) +{ + uint32_t cr4; + __asm __volatile("movl %%cr4,%0" : "=r" (cr4)); + return cr4; +} + +static __inline void +tlbflush(void) +{ + uint32_t cr3; + __asm __volatile("movl %%cr3,%0" : "=r" (cr3)); + __asm __volatile("movl %0,%%cr3" : : "r" (cr3)); +} + +static __inline uint32_t +read_eflags(void) +{ + uint32_t eflags; + __asm __volatile("pushfl; popl %0" : "=r" (eflags)); + return eflags; +} + +static __inline void +write_eflags(uint32_t eflags) +{ + __asm __volatile("pushl %0; popfl" : : "r" (eflags)); +} + +static __inline uint32_t +read_ebp(void) +{ + uint32_t ebp; + __asm __volatile("movl %%ebp,%0" : "=r" (ebp)); + return ebp; +} + +static __inline uint32_t +read_esp(void) +{ + uint32_t esp; + __asm __volatile("movl %%esp,%0" : "=r" (esp)); + return esp; +} + +static __inline void +cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp) +{ + uint32_t eax, ebx, ecx, edx; + asm volatile("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (info)); + if (eaxp) + *eaxp = eax; + if (ebxp) + *ebxp = ebx; + if (ecxp) + *ecxp = ecx; + if (edxp) + *edxp = edx; +} + +static __inline uint64_t +read_tsc(void) +{ + uint64_t tsc; + __asm __volatile("rdtsc" : "=A" (tsc)); + return tsc; +} + +struct PushRegs { + /* registers as pushed by pusha */ + uint32_t reg_edi; + uint32_t reg_esi; + uint32_t reg_ebp; + uint32_t reg_oesp; /* Useless */ + uint32_t reg_ebx; + uint32_t reg_edx; + uint32_t reg_ecx; + uint32_t reg_eax; +}; + +struct Trapframe { + struct PushRegs tf_regs; + uint16_t tf_es; + uint16_t tf_padding1; + uint16_t tf_ds; + uint16_t tf_padding2; + uint32_t tf_trapno; + /* below here defined by x86 hardware */ + uint32_t tf_err; + uintptr_t tf_eip; + uint16_t tf_cs; + uint16_t tf_padding3; + uint32_t tf_eflags; + /* below here only when crossing rings, such as from user to kernel */ + uintptr_t tf_esp; + uint16_t tf_ss; + uint16_t tf_padding4; +};