From 1d48c0148e1f1c6fcd8b6dafc39cf199926dbeff Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Fri, 13 Jul 2012 00:54:27 +0200 Subject: [PATCH] segmentless smp fixes adjust the smp booting procedure for segmentless operation. changes are mostly due to gdt/idt being dependent on paging, because of the high location, and paging being on much sooner because of that too. also smaller fixes: redefine DESC_SIZE, fix kernel makefile variable name (crosscompiling), some null pointer checks that trap now because of a sparser pagetable, acpi sanity checking --- include/arch/i386/include/archtypes.h | 8 ++ kernel/Makefile | 2 +- kernel/arch/i386/acpi.c | 32 ++++--- kernel/arch/i386/apic.c | 17 +++- kernel/arch/i386/arch_clock.c | 9 +- kernel/arch/i386/arch_smp.c | 120 +++++++++++++------------- kernel/arch/i386/include/arch_proto.h | 3 +- kernel/arch/i386/include/archconst.h | 2 + kernel/arch/i386/memory.c | 11 ++- kernel/arch/i386/mpx.S | 23 ++--- kernel/arch/i386/pg_utils.c | 68 +++++++++++++-- kernel/arch/i386/pre_init.c | 18 +--- kernel/arch/i386/protect.c | 55 ++++++------ kernel/arch/i386/trampoline.S | 28 +++++- kernel/glo.h | 2 +- kernel/main.c | 6 ++ 16 files changed, 259 insertions(+), 145 deletions(-) diff --git a/include/arch/i386/include/archtypes.h b/include/arch/i386/include/archtypes.h index 62bc5aaae..06263f6a5 100644 --- a/include/arch/i386/include/archtypes.h +++ b/include/arch/i386/include/archtypes.h @@ -16,6 +16,14 @@ struct segdesc_s { /* segment descriptor for protected mode */ u8_t base_high; } __attribute__((packed)); +struct gatedesc_s { + u16_t offset_low; + u16_t selector; + u8_t pad; /* |000|XXXXX| ig & trpg, |XXXXXXXX| task g */ + u8_t p_dpl_type; /* |P|DL|0|TYPE| */ + u16_t offset_high; +} __attribute__((packed)); + struct desctableptr_s { u16_t limit; u32_t base; diff --git a/kernel/Makefile b/kernel/Makefile index b46a65efd..5801310e6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ PROG= kernel SRCS+= clock.c cpulocals.c interrupt.c main.c proc.c system.c \ table.c utility.c -LINKERSCRIPT=${.CURDIR}/arch/${ARCH}/kernel.lds +LINKERSCRIPT=${.CURDIR}/arch/${MACHINE_ARCH}/kernel.lds DPADD+= ${LIBTIMERS} ${LIBSYS} ${LIBEXEC} $(LINKERSCRIPT) LDADD+= -ltimers -lsys -lexec diff --git a/kernel/arch/i386/acpi.c b/kernel/arch/i386/acpi.c index 519410f1f..225467ba2 100644 --- a/kernel/arch/i386/acpi.c +++ b/kernel/arch/i386/acpi.c @@ -39,6 +39,24 @@ static int acpi_check_signature(const char * orig, const char * match) return strncmp(orig, match, ACPI_SDT_SIGNATURE_LEN); } +static u32_t acpi_phys2vir(u32_t p) +{ + if(!vm_running) { + printf("acpi: returning 0x%lx as vir addr\n", p); + return p; + } + panic("acpi: can't get virtual address of arbitrary physical address"); +} + +static int acpi_phys_copy(phys_bytes phys, void *target, size_t len) +{ + if(!vm_running) { + memcpy(target, (void *) phys, len); + return 0; + } + panic("can't acpi_phys_copy with vm"); +} + static int acpi_read_sdt_at(phys_bytes addr, struct acpi_sdt_header * tb, size_t size, @@ -172,7 +190,7 @@ static int get_acpi_rsdp(void) /* * Read 40:0Eh - to find the starting address of the EBDA. */ - phys_copy (0x40E, vir2phys(&ebda), sizeof(ebda)); + acpi_phys_copy (0x40E, &ebda, sizeof(ebda)); if (ebda) { ebda <<= 4; if(platform_tbl_ptr(ebda, ebda + 0x400, 16, &acpi_rsdp, @@ -192,16 +210,10 @@ static int get_acpi_rsdp(void) return 0; } -static int acpi_read_kernel(phys_bytes addr, void * buff, size_t size) -{ - phys_copy(addr, vir2phys(buff), size); - return 0; -} - void acpi_init(void) { int s, i; - read_func = acpi_read_kernel; + read_func = acpi_phys_copy; if (!get_acpi_rsdp()) { printf("WARNING : Cannot configure ACPI\n"); @@ -238,7 +250,7 @@ struct acpi_madt_ioapic * acpi_get_ioapic_next(void) if (idx == 0) { madt_hdr = (struct acpi_madt_hdr *) - phys2vir(acpi_get_table_base("APIC")); + acpi_phys2vir(acpi_get_table_base("APIC")); if (madt_hdr == NULL) return NULL; } @@ -260,7 +272,7 @@ struct acpi_madt_lapic * acpi_get_lapic_next(void) if (idx == 0) { madt_hdr = (struct acpi_madt_hdr *) - phys2vir(acpi_get_table_base("APIC")); + acpi_phys2vir(acpi_get_table_base("APIC")); if (madt_hdr == NULL) return NULL; } diff --git a/kernel/arch/i386/apic.c b/kernel/arch/i386/apic.c index bb342c4b3..5c652c0a9 100644 --- a/kernel/arch/i386/apic.c +++ b/kernel/arch/i386/apic.c @@ -365,6 +365,11 @@ void ioapic_disable_all(void) static void ioapic_disable_irq(unsigned irq) { + if(!(io_apic_irq[irq].ioa)) { + printf("ioapic_disable_irq: no ioa set for irq %d!\n", irq); + return; + } + assert(io_apic_irq[irq].ioa); ioapic_disable_pin(io_apic_irq[irq].ioa->addr, io_apic_irq[irq].pin); @@ -373,6 +378,11 @@ static void ioapic_disable_irq(unsigned irq) static void ioapic_enable_irq(unsigned irq) { + if(!(io_apic_irq[irq].ioa)) { + printf("ioapic_enable_irq: no ioa set for irq %d!\n", irq); + return; + } + assert(io_apic_irq[irq].ioa); ioapic_enable_pin(io_apic_irq[irq].ioa->addr, io_apic_irq[irq].pin); @@ -915,16 +925,17 @@ static int acpi_get_ioapics(struct io_apic * ioa, unsigned * nioa, unsigned max) if (acpi_ioa == NULL) break; + assert(acpi_ioa->address); + ioa[n].id = acpi_ioa->id; ioa[n].addr = acpi_ioa->address; ioa[n].paddr = (phys_bytes) acpi_ioa->address; ioa[n].gsi_base = acpi_ioa->global_int_base; ioa[n].pins = ((ioapic_read(ioa[n].addr, IOAPIC_VERSION) & 0xff0000) >> 16)+1; - printf("IO APIC %d addr 0x%lx paddr 0x%lx pins %d\n", - acpi_ioa->id, ioa[n].addr, ioa[n].paddr, + printf("IO APIC idx %d id %d addr 0x%lx paddr 0x%lx pins %d\n", + n, acpi_ioa->id, ioa[n].addr, ioa[n].paddr, ioa[n].pins); - n++; } diff --git a/kernel/arch/i386/arch_clock.c b/kernel/arch/i386/arch_clock.c index 07a006e8d..692a95d92 100644 --- a/kernel/arch/i386/arch_clock.c +++ b/kernel/arch/i386/arch_clock.c @@ -206,6 +206,7 @@ void context_stop(struct proc * p) u64_t * __tsc_ctr_switch = get_cpulocal_var_ptr(tsc_ctr_switch); #ifdef CONFIG_SMP unsigned cpu = cpuid; + int must_bkl_unlock = 0; /* * This function is called only if we switch from kernel to user or idle @@ -222,7 +223,7 @@ void context_stop(struct proc * p) tmp = sub64(tsc, *__tsc_ctr_switch); kernel_ticks[cpu] = add64(kernel_ticks[cpu], tmp); p->p_cycles = add64(p->p_cycles, tmp); - BKL_UNLOCK(); + must_bkl_unlock = 1; } else { u64_t bkl_tsc; atomic_t succ; @@ -295,6 +296,12 @@ void context_stop(struct proc * p) } *__tsc_ctr_switch = tsc; + +#ifdef CONFIG_SMP + if(must_bkl_unlock) { + BKL_UNLOCK(); + } +#endif } void context_stop_idle(void) diff --git a/kernel/arch/i386/arch_smp.c b/kernel/arch/i386/arch_smp.c index 83a3b0b7e..25c536883 100644 --- a/kernel/arch/i386/arch_smp.c +++ b/kernel/arch/i386/arch_smp.c @@ -11,7 +11,10 @@ #include "arch_proto.h" #include "kernel/glo.h" #include +#include #include +#include +#include #include #include #include @@ -31,8 +34,9 @@ void trampoline(void); * They have to be in location which is reachable using absolute addressing in * 16-bit mode */ -extern volatile u32_t __ap_id; +extern volatile u32_t __ap_id, __ap_pt; extern volatile struct desctableptr_s __ap_gdt, __ap_idt; +extern u32_t __ap_gdt_tab, __ap_idt_tab; extern void * __trampoline_end; extern u32_t busclock[CONFIG_MAX_CPUS]; @@ -50,81 +54,84 @@ SPINLOCK_DEFINE(dispq_lock) static void smp_reinit_vars(void); +/* These are initialized in protect.c */ +extern struct segdesc_s gdt[GDT_SIZE]; +extern struct gatedesc_s idt[IDT_SIZE]; +extern struct tss_s tss[CONFIG_MAX_CPUS]; +extern int prot_init_done; /* Indicates they are ready */ + +static phys_bytes trampoline_base; + +static u32_t ap_lin_addr(void *vaddr) +{ + assert(trampoline_base); + return (u32_t) vaddr - (u32_t) &trampoline + trampoline_base; +} + /* * copies the 16-bit AP trampoline code to the first 1M of memory */ -static phys_bytes copy_trampoline(void) +void copy_trampoline(void) { char * s, *end; - phys_bytes tramp_base = 0; - unsigned tramp_size; + unsigned tramp_size, tramp_start = (unsigned)&trampoline;; - tramp_size = (unsigned) &__trampoline_end - (unsigned)&trampoline; - s = env_get("memory"); - if (!s) - return 0; + /* The trampoline code/data is made to be page-aligned. */ + assert(!(tramp_start % I386_PAGE_SIZE)); - while (*s != 0) { - phys_bytes base = 0xfffffff; - unsigned size; - /* Read fresh base and expect colon as next char. */ - base = strtoul(s, &end, 0x10); /* get number */ - if (end != s && *end == ':') - s = ++end; /* skip ':' */ - else - *s=0; + tramp_size = (unsigned) &__trampoline_end - tramp_start; + trampoline_base = alloc_lowest(&kinfo, tramp_size); - /* Read fresh size and expect comma or assume end. */ - size = strtoul(s, &end, 0x10); /* get number */ - if (end != s && *end == ',') - s = ++end; /* skip ',' */ + /* The memory allocator finds the lowest available memory.. + * Verify it's low enough + */ + assert(trampoline_base + tramp_size < (1 << 20)); - tramp_base = (base + 0xfff) & ~(0xfff); - /* the address must be less than 1M */ - if (tramp_base >= (1 << 20)) - continue; - if (size - (tramp_base - base) < tramp_size) - continue; - break; - } + /* prepare gdt and idt for the new cpus; make copies + * of both the tables and the descriptors of them + * in their boot addressing environment. + */ + assert(prot_init_done); + memcpy(&__ap_gdt_tab, gdt, sizeof(gdt)); + memcpy(&__ap_idt_tab, gdt, sizeof(idt)); + __ap_gdt.base = ap_lin_addr(&__ap_gdt_tab); + __ap_gdt.limit = sizeof(gdt)-1; + __ap_idt.base = ap_lin_addr(&__ap_idt_tab); + __ap_idt.limit = sizeof(idt)-1; - phys_copy(vir2phys(trampoline), tramp_base, tramp_size); - - return tramp_base; + phys_copy(trampoline, trampoline_base, tramp_size); } -extern struct desctableptr_s gdt_desc, idt_desc; +extern int booting_cpu; /* tell protect.c what to do */ static void smp_start_aps(void) { - /* - * Find an address and align it to a 4k boundary. - */ unsigned cpu; - u32_t biosresetvector; - phys_bytes trampoline_base, __ap_id_phys; + u32_t biosresetvector, *newptpos; + phys_bytes __ap_id_phys; + struct proc *bootstrap_pt = get_cpulocal_var(ptproc); /* TODO hack around the alignment problem */ - phys_copy (0x467, vir2phys(&biosresetvector), sizeof(u32_t)); + phys_copy (0x467, &biosresetvector, sizeof(u32_t)); /* set the bios shutdown code to 0xA */ outb(RTC_INDEX, 0xF); outb(RTC_IO, 0xA); - /* prepare gdt and idt for the new cpus */ - __ap_gdt = gdt_desc; - __ap_idt = idt_desc; + assert(bootstrap_pt); + assert(bootstrap_pt->p_seg.p_cr3); + __ap_pt = bootstrap_pt->p_seg.p_cr3; + assert(__ap_pt); - if (!(trampoline_base = copy_trampoline())) { - printf("Copying trampoline code failed, cannot boot SMP\n"); - ncpus = 1; - } + copy_trampoline(); + + /* New locations for cpu id, pagetable root */ __ap_id_phys = trampoline_base + (phys_bytes) &__ap_id - (phys_bytes)&trampoline; /* setup the warm reset vector */ - phys_copy(vir2phys(&trampoline_base), 0x467, sizeof(u32_t)); + phys_copy(&trampoline_base, 0x467, sizeof(u32_t)); /* okay, we're ready to go. boot all of the ap's now. we loop through * using the processor's apic id values. @@ -137,9 +144,8 @@ static void smp_start_aps(void) continue; } - __ap_id = cpu; - phys_copy(vir2phys((void *) &__ap_id), - __ap_id_phys, sizeof(__ap_id)); + __ap_id = booting_cpu = cpu; + phys_copy((void *) &__ap_id, __ap_id_phys, sizeof(__ap_id)); mfence(); if (apic_send_init_ipi(cpu, trampoline_base) || apic_send_startup_ipi(cpu, trampoline_base)) { @@ -161,7 +167,7 @@ static void smp_start_aps(void) } } - phys_copy(vir2phys(&biosresetvector),(phys_bytes)0x467,sizeof(u32_t)); + phys_copy(&biosresetvector,(phys_bytes)0x467,sizeof(u32_t)); outb(RTC_INDEX, 0xF); outb(RTC_IO, 0); @@ -219,9 +225,6 @@ static void ap_finish_booting(void) /* inform the world of our presence. */ ap_cpu_ready = cpu; - while(!bootstrap_pagetable_done) - arch_pause(); - /* * Finish processor initialisation. CPUs must be excluded from running. * lapic timer calibration locks and unlocks the BKL because of the @@ -231,14 +234,7 @@ static void ap_finish_booting(void) spinlock_lock(&boot_lock); BKL_LOCK(); - /* - * we must load some page tables befre we turn paging on. As VM is - * always present we use those - */ - pg_load(); /* load bootstrap pagetable built by BSP */ - vm_enable_paging(); - - printf("CPU %d paging is on\n", cpu); + printf("CPU %d is up\n", cpu); cpu_identify(); diff --git a/kernel/arch/i386/include/arch_proto.h b/kernel/arch/i386/include/arch_proto.h index d6519151a..3693ac460 100644 --- a/kernel/arch/i386/include/arch_proto.h +++ b/kernel/arch/i386/include/arch_proto.h @@ -161,12 +161,13 @@ u32_t read_ds(void); u32_t read_ss(void); void add_memmap(kinfo_t *cbi, u64_t addr, u64_t len); +phys_bytes alloc_lowest(kinfo_t *cbi, phys_bytes len); void vm_enable_paging(void); void cut_memmap(kinfo_t *cbi, phys_bytes start, phys_bytes end); phys_bytes pg_roundup(phys_bytes b); void pg_info(reg_t *, u32_t **); void pg_clear(void); -void pg_identity(void); +void pg_identity(kinfo_t *); phys_bytes pg_load(void); void pg_map(phys_bytes phys, vir_bytes vaddr, vir_bytes vaddr_end, kinfo_t *cbi); int pg_mapkernel(void); diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index 123bc4768..6ae9de784 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -28,6 +28,8 @@ #define LDT_SELECTOR SEG_SELECTOR(LDT_INDEX) #define TSS_SELECTOR(cpu) SEG_SELECTOR(TSS_INDEX(cpu)) +#define DESC_SIZE 8 + /* Privileges. */ #define INTR_PRIVILEGE 0 /* kernel and interrupt handlers */ #define USER_PRIVILEGE 3 /* servers and user processes */ diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index c78077601..e87628103 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -164,11 +164,11 @@ static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr, #ifdef CONFIG_SMP unsigned cpu = cpuid; - if (GET_BIT(srcproc->p_stale_tlb, cpu)) { + if (srcproc && GET_BIT(srcproc->p_stale_tlb, cpu)) { changed = 1; UNSET_BIT(srcproc->p_stale_tlb, cpu); } - if (GET_BIT(dstproc->p_stale_tlb, cpu)) { + if (dstproc && GET_BIT(dstproc->p_stale_tlb, cpu)) { changed = 1; UNSET_BIT(dstproc->p_stale_tlb, cpu); } @@ -815,10 +815,13 @@ int arch_phys_map(const int index, *flags = VMMF_UNCACHED; return OK; } - else if (ioapic_enabled && index <= ioapic_last_index) { - *addr = io_apic[index - 1].paddr; + else if (ioapic_enabled && index >= ioapic_first_index && index <= ioapic_last_index) { + int ioapic_idx = index - ioapic_first_index; + *addr = io_apic[ioapic_idx].paddr; + assert(*addr); *len = 4 << 10 /* 4kB */; *flags = VMMF_UNCACHED; + printf("ioapic map: addr 0x%lx\n", *addr); return OK; } #endif diff --git a/kernel/arch/i386/mpx.S b/kernel/arch/i386/mpx.S index 830a99415..fa39e74a6 100644 --- a/kernel/arch/i386/mpx.S +++ b/kernel/arch/i386/mpx.S @@ -479,27 +479,22 @@ ENTRY(startup_ap_32) /* * we are in protected mode now, %cs is correct and we need to set the * data descriptors before we can touch anything + * + * first load the regular, highly mapped idt, gdt */ - movw $KERN_DS_SELECTOR, %ax - mov %ax, %ds - mov %ax, %ss - mov %ax, %es - movw $0, %ax - mov %ax, %fs - mov %ax, %gs - /* load TSS for this cpu which was prepared by BSP */ - movl _C_LABEL(__ap_id), %ecx - shl $3, %cx - mov $TSS_SELECTOR(0), %eax - add %cx, %ax - ltr %ax - /* * use the boot stack for now. The running CPUs are already using their * own stack, the rest is still waiting to be booted */ + movw $KERN_DS_SELECTOR, %ax + mov %ax, %ds + mov %ax, %ss mov $_C_LABEL(k_boot_stktop) - 4, %esp + + /* load the highly mapped idt, gdt, per-cpu tss */ + call _C_LABEL(prot_load_selectors) + jmp _C_LABEL(smp_ap_boot) hlt #endif diff --git a/kernel/arch/i386/pg_utils.c b/kernel/arch/i386/pg_utils.c index 270250001..f096369a0 100644 --- a/kernel/arch/i386/pg_utils.c +++ b/kernel/arch/i386/pg_utils.c @@ -22,6 +22,17 @@ static phys_bytes kern_kernlen = (phys_bytes) &_kern_size; /* page directory we can use to map things */ static u32_t pagedir[1024] __aligned(4096); +void print_memmap(kinfo_t *cbi) +{ + int m; + assert(cbi->mmap_size < MAXMEMMAP); + for(m = 0; m < cbi->mmap_size; m++) { + phys_bytes addr = cbi->memmap[m].addr, endit = cbi->memmap[m].addr + cbi->memmap[m].len; + printf("%08lx-%08lx ",addr, endit); + } + printf("\nsize %08lx\n", cbi->mmap_size); +} + void cut_memmap(kinfo_t *cbi, phys_bytes start, phys_bytes end) { int m; @@ -32,6 +43,8 @@ void cut_memmap(kinfo_t *cbi, phys_bytes start, phys_bytes end) if((o=end % I386_PAGE_SIZE)) end += I386_PAGE_SIZE - o; + assert(kernel_may_alloc); + for(m = 0; m < cbi->mmap_size; m++) { phys_bytes substart = start, subend = end; phys_bytes memaddr = cbi->memmap[m].addr, @@ -53,10 +66,29 @@ void cut_memmap(kinfo_t *cbi, phys_bytes start, phys_bytes end) } } +phys_bytes alloc_lowest(kinfo_t *cbi, phys_bytes len) +{ + /* Allocate the lowest physical page we have. */ + int m; +#define EMPTY 0xffffffff + phys_bytes lowest = EMPTY; + assert(len > 0); + len = roundup(len, I386_PAGE_SIZE); + + assert(kernel_may_alloc); + + for(m = 0; m < cbi->mmap_size; m++) { + if(cbi->memmap[m].len < len) continue; + if(cbi->memmap[m].addr < lowest) lowest = cbi->memmap[m].addr; + } + assert(lowest != EMPTY); + cut_memmap(cbi, lowest, len); + return lowest; +} + void add_memmap(kinfo_t *cbi, u64_t addr, u64_t len) { int m; - phys_bytes highmark; #define LIMIT 0xFFFFF000 /* Truncate available memory at 4GB as the rest of minix * currently can't deal with any bigger. @@ -69,20 +101,25 @@ void add_memmap(kinfo_t *cbi, u64_t addr, u64_t len) if(len == 0) return; addr = roundup(addr, I386_PAGE_SIZE); len = rounddown(len, I386_PAGE_SIZE); + + assert(kernel_may_alloc); + for(m = 0; m < MAXMEMMAP; m++) { + phys_bytes highmark; if(cbi->memmap[m].len) continue; cbi->memmap[m].addr = addr; cbi->memmap[m].len = len; cbi->memmap[m].type = MULTIBOOT_MEMORY_AVAILABLE; if(m >= cbi->mmap_size) cbi->mmap_size = m+1; + highmark = addr + len; + if(highmark > cbi->mem_high_phys) { + cbi->mem_high_phys = highmark; + } + return; } - highmark = addr + len; - if(highmark > cbi->mem_high_phys) - cbi->mem_high_phys = highmark; - panic("no available memmap slot"); } @@ -105,6 +142,9 @@ phys_bytes pg_alloc_page(kinfo_t *cbi) { int m; multiboot_memory_map_t *mmap; + + assert(kernel_may_alloc); + for(m = cbi->mmap_size-1; m >= 0; m--) { mmap = &cbi->memmap[m]; if(!mmap->len) continue; @@ -120,16 +160,26 @@ phys_bytes pg_alloc_page(kinfo_t *cbi) panic("can't find free memory"); } -void pg_identity(void) +void pg_identity(kinfo_t *cbi) { int i; phys_bytes phys; + /* We map memory that does not correspond to physical memory + * as non-cacheable. Make sure we know what it is. + */ + assert(cbi->mem_high_phys); + /* Set up an identity mapping page directory */ for(i = 0; i < I386_VM_DIR_ENTRIES; i++) { + u32_t flags = I386_VM_PRESENT | I386_VM_BIGPAGE | + I386_VM_USER | I386_VM_WRITE; + if((cbi->mem_high_phys & I386_VM_ADDR_MASK_4MB) + <= (phys & I386_VM_ADDR_MASK_4MB)) { + flags |= I386_VM_PWT | I386_VM_PCD; + } phys = i * I386_BIG_PAGE_SIZE; - pagedir[i] = phys | I386_VM_PRESENT | I386_VM_BIGPAGE | - I386_VM_USER | I386_VM_WRITE; + pagedir[i] = phys | flags; } } @@ -216,6 +266,8 @@ void pg_map(phys_bytes phys, vir_bytes vaddr, vir_bytes vaddr_end, static u32_t *pt = NULL; int pde, pte; + assert(kernel_may_alloc); + if(phys == PG_ALLOCATEME) { assert(!(vaddr % I386_PAGE_SIZE)); } else { diff --git a/kernel/arch/i386/pre_init.c b/kernel/arch/i386/pre_init.c index 53aac5b66..66e63efe0 100644 --- a/kernel/arch/i386/pre_init.c +++ b/kernel/arch/i386/pre_init.c @@ -37,6 +37,9 @@ char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER; /* String length used for mb_itoa */ #define ITOA_BUFFER_SIZE 20 +/* Kernel may use memory */ +int kernel_may_alloc = 1; + static int mb_set_param(char *bigbuf, char *name, char *value, kinfo_t *cbi) { char *p = bigbuf; @@ -96,16 +99,6 @@ int overlaps(multiboot_module_t *mod, int n, int cmp_mod) return 0; } -void print_memmap(kinfo_t *cbi) -{ - int m; - assert(cbi->mmap_size < MAXMEMMAP); - for(m = 0; m < cbi->mmap_size; m++) { - printf("%08lx-%08lx ",cbi->memmap[m].addr, cbi->memmap[m].addr + cbi->memmap[m].len); - } - printf("\nsize %08lx\n", cbi->mmap_size); -} - void get_parameters(u32_t ebx, kinfo_t *cbi) { multiboot_memory_map_t *mmap; @@ -225,9 +218,6 @@ kinfo_t *pre_init(u32_t magic, u32_t ebx) * Here we find out whether we should do serial output. */ get_parameters(ebx, &kinfo); - - /* Say hello. */ - printf("MINIX loading\n"); assert(magic == MULTIBOOT_BOOTLOADER_MAGIC); @@ -236,7 +226,7 @@ kinfo_t *pre_init(u32_t magic, u32_t ebx) * this code stays where it should be. */ pg_clear(); - pg_identity(); + pg_identity(&kinfo); kinfo.freepde_start = pg_mapkernel(); pg_load(); vm_enable_paging(); diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index 8efa837f0..6627cc329 100644 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -21,19 +21,13 @@ /* This is OK initially, when the 1:1 mapping is still there. */ char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER; -struct gatedesc_s { - u16_t offset_low; - u16_t selector; - u8_t pad; /* |000|XXXXX| ig & trpg, |XXXXXXXX| task g */ - u8_t p_dpl_type; /* |P|DL|0|TYPE| */ - u16_t offset_high; -} __attribute__((packed)); - /* Storage for gdt, idt and tss. */ -static struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE); +struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE); struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE); struct tss_s tss[CONFIG_MAX_CPUS]; +int prot_init_done = 0; + phys_bytes vir2phys(void *vir) { extern char _kern_vir_base, _kern_phys_base; /* in kernel.lds */ @@ -263,12 +257,33 @@ multiboot_module_t *bootmod(int pnr) panic("boot module %d not found", pnr); } +int booting_cpu = 0; + +void prot_load_selectors(void) +{ + /* this function is called by both prot_init by the BSP and + * the early AP booting code in mpx.S by secondary CPU's. + * everything is set up the same except for the TSS that is per-CPU. + */ + x86_lgdt(&gdt_desc); /* Load gdt */ + idt_init(); + idt_reload(); + x86_lldt(LDT_SELECTOR); /* Load bogus ldt */ + x86_ltr(TSS_SELECTOR(booting_cpu)); + + x86_load_kerncs(); + x86_load_ds(KERN_DS_SELECTOR); + x86_load_es(KERN_DS_SELECTOR); + x86_load_fs(KERN_DS_SELECTOR); + x86_load_gs(KERN_DS_SELECTOR); + x86_load_ss(KERN_DS_SELECTOR); +} + /*===========================================================================* * prot_init * *===========================================================================*/ void prot_init() { - int sel_tss; extern char k_boot_stktop; memset(gdt, 0, sizeof(gdt)); @@ -279,7 +294,7 @@ void prot_init() gdt_desc.limit = sizeof(gdt)-1; idt_desc.base = (u32_t) idt; idt_desc.limit = sizeof(idt)-1; - sel_tss = tss_init(0, &k_boot_stktop); + tss_init(0, &k_boot_stktop); /* Build GDT */ init_param_dataseg(&gdt[LDT_INDEX], @@ -290,22 +305,11 @@ void prot_init() init_codeseg(USER_CS_INDEX, USER_PRIVILEGE); init_dataseg(USER_DS_INDEX, USER_PRIVILEGE); - x86_lgdt(&gdt_desc); /* Load gdt */ - idt_init(); - idt_reload(); - x86_lldt(LDT_SELECTOR); /* Load bogus ldt */ - x86_ltr(sel_tss); /* Load global TSS */ - /* Currently the multiboot segments are loaded; which is fine, but * let's replace them with the ones from our own GDT so we test * right away whether they work as expected. */ - x86_load_kerncs(); - x86_load_ds(KERN_DS_SELECTOR); - x86_load_es(KERN_DS_SELECTOR); - x86_load_fs(KERN_DS_SELECTOR); - x86_load_gs(KERN_DS_SELECTOR); - x86_load_ss(KERN_DS_SELECTOR); + prot_load_selectors(); /* Set up a new post-relocate bootstrap pagetable so that * we can map in VM, and we no longer rely on pre-relocated @@ -313,10 +317,11 @@ void prot_init() */ pg_clear(); - pg_identity(); /* Still need 1:1 for lapic and video mem and such. */ + pg_identity(&kinfo); /* Still need 1:1 for lapic and video mem and such. */ pg_mapkernel(); pg_load(); - bootstrap_pagetable_done = 1; /* secondary CPU's can use it too */ + + prot_init_done = 1; } void arch_post_init(void) diff --git a/kernel/arch/i386/trampoline.S b/kernel/arch/i386/trampoline.S index a03aedbf8..92e4635cd 100644 --- a/kernel/arch/i386/trampoline.S +++ b/kernel/arch/i386/trampoline.S @@ -1,4 +1,5 @@ #include +#include #include "archconst.h" .balign 4096 @@ -6,7 +7,7 @@ .code16 ENTRY(trampoline) cli - + /* %cs has some value and we must use the same for data */ mov %cs, %ax mov %ax, %ds @@ -20,13 +21,38 @@ ENTRY(trampoline) orb $1, %al mov %eax, %cr0 + /* set page table feature flags: cr4.PSE on, cr4.PGE off */ + movl %cr4, %eax + orl $I386_CR4_PSE, %eax /* Turn on PSE */ + andl $~I386_CR4_PGE, %eax /* Turn off PGE */ + movl %eax, %cr4 + + /* load boot cr3 and turn PG on so CPU can see all of memory */ + movl _C_LABEL(__ap_pt) - _C_LABEL(trampoline), %eax + movl %eax, %cr3 + movl %cr0, %ecx + orl $I386_CR0_PG, %ecx + movl %ecx, %cr0 + + /* turn on cr4.PGE after cr0.PG is on */ + movl %cr4, %eax + orl $I386_CR4_PGE, %eax + movl %eax, %cr4 + + /* jump into regular highly mapped kernel */ ljmpl $KERN_CS_SELECTOR, $_C_LABEL(startup_ap_32) .balign 4 LABEL(__ap_id) .space 4 +LABEL(__ap_pt) +.space 4 LABEL(__ap_gdt) .space 8 LABEL(__ap_idt) .space 8 +LABEL(__ap_gdt_tab) +.space GDT_SIZE*DESC_SIZE +LABEL(__ap_idt_tab) +.space IDT_SIZE*DESC_SIZE LABEL(__trampoline_end) diff --git a/kernel/glo.h b/kernel/glo.h index 4bb96da52..52471c819 100644 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -59,11 +59,11 @@ EXTERN u64_t cpu_hz[CONFIG_MAX_CPUS]; #ifdef CONFIG_SMP EXTERN int config_no_smp; /* optionaly turn off SMP */ #endif -EXTERN int bootstrap_pagetable_done; /* VM */ EXTERN int vm_running; EXTERN int catch_pagefaults; +EXTERN int kernel_may_alloc; /* Variables that are initialized elsewhere are just extern here. */ extern struct boot_image image[NR_BOOT_PROCS]; /* system image processes */ diff --git a/kernel/main.c b/kernel/main.c index 5cf56e1db..7d92ce437 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -107,6 +107,9 @@ void bsp_finish_booting(void) machine.bsp_id = 0; #endif + /* Kernel may no longer use bits of memory as VM will be running soon */ + kernel_may_alloc = 0; + switch_to_user(); NOT_REACHABLE; } @@ -128,6 +131,9 @@ void kmain(kinfo_t *local_cbi) /* We can talk now */ printf("MINIX booting\n"); + /* Kernel may use bits of main memory before VM is started */ + kernel_may_alloc = 1; + assert(sizeof(kinfo.boot_procs) == sizeof(image)); memcpy(kinfo.boot_procs, image, sizeof(kinfo.boot_procs));