cfd3379bb1
* Removed startup code patches in lib/csu regarding kernel to userland ABI. * Aligned stack layout on NetBSD stack layout. * Generate valid stack pointers instead of offsets by taking into account _minix_kerninfo->kinfo->user_sp. * Refactored stack generation, by moving part of execve in two functions {minix_stack_params(), minix_stack_fill()} and using them in execve(), rs and vm. * Changed load offset of rtld (ld.so) to: execi.args.stack_high - execi.args.stack_size - 0xa00000 which is 10MB below the main executable stack. Change-Id: I839daf3de43321cded44105634102d419cb36cec
457 lines
14 KiB
C
457 lines
14 KiB
C
/* This file contains code for initialization of protected mode, to initialize
|
|
* code and data segment descriptors, and to initialize global descriptors
|
|
* for local descriptors in the process table.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
|
|
#include <minix/cpufeature.h>
|
|
#include <machine/multiboot.h>
|
|
#include "kernel/kernel.h"
|
|
|
|
#include "archconst.h"
|
|
#include "arch_proto.h"
|
|
|
|
#include <sys/exec.h>
|
|
#include <libexec.h>
|
|
|
|
#define INT_GATE_TYPE (INT_286_GATE | DESC_386_BIT)
|
|
#define TSS_TYPE (AVL_286_TSS | DESC_386_BIT)
|
|
|
|
/* This is OK initially, when the 1:1 mapping is still there. */
|
|
char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER;
|
|
|
|
/* Storage for gdt, idt and tss. */
|
|
struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE);
|
|
struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE);
|
|
struct tss_s tss[CONFIG_MAX_CPUS];
|
|
|
|
u32_t k_percpu_stacks[CONFIG_MAX_CPUS];
|
|
|
|
int prot_init_done = 0;
|
|
|
|
phys_bytes vir2phys(void *vir)
|
|
{
|
|
extern char _kern_vir_base, _kern_phys_base; /* in kernel.lds */
|
|
u32_t offset = (vir_bytes) &_kern_vir_base -
|
|
(vir_bytes) &_kern_phys_base;
|
|
return (phys_bytes)vir - offset;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* enable_iop *
|
|
*===========================================================================*/
|
|
void enable_iop(struct proc *pp)
|
|
{
|
|
/* Allow a user process to use I/O instructions. Change the I/O Permission
|
|
* Level bits in the psw. These specify least-privileged Current Permission
|
|
* Level allowed to execute I/O instructions. Users and servers have CPL 3.
|
|
* You can't have less privilege than that. Kernel has CPL 0, tasks CPL 1.
|
|
*/
|
|
pp->p_reg.psw |= 0x3000;
|
|
}
|
|
|
|
|
|
/*===========================================================================*
|
|
* sdesc *
|
|
*===========================================================================*/
|
|
void sdesc(struct segdesc_s *segdp, phys_bytes base, vir_bytes size)
|
|
{
|
|
/* Fill in the size fields (base, limit and granularity) of a descriptor. */
|
|
segdp->base_low = base;
|
|
segdp->base_middle = base >> BASE_MIDDLE_SHIFT;
|
|
segdp->base_high = base >> BASE_HIGH_SHIFT;
|
|
|
|
--size; /* convert to a limit, 0 size means 4G */
|
|
if (size > BYTE_GRAN_MAX) {
|
|
segdp->limit_low = size >> PAGE_GRAN_SHIFT;
|
|
segdp->granularity = GRANULAR | (size >>
|
|
(PAGE_GRAN_SHIFT + GRANULARITY_SHIFT));
|
|
} else {
|
|
segdp->limit_low = size;
|
|
segdp->granularity = size >> GRANULARITY_SHIFT;
|
|
}
|
|
segdp->granularity |= DEFAULT; /* means BIG for data seg */
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* init_dataseg *
|
|
*===========================================================================*/
|
|
void init_param_dataseg(register struct segdesc_s *segdp,
|
|
phys_bytes base, vir_bytes size, const int privilege)
|
|
{
|
|
/* Build descriptor for a data segment. */
|
|
sdesc(segdp, base, size);
|
|
segdp->access = (privilege << DPL_SHIFT) | (PRESENT | SEGMENT |
|
|
WRITEABLE | ACCESSED);
|
|
/* EXECUTABLE = 0, EXPAND_DOWN = 0, ACCESSED = 0 */
|
|
}
|
|
|
|
void init_dataseg(int index, const int privilege)
|
|
{
|
|
init_param_dataseg(&gdt[index], 0, 0xFFFFFFFF, privilege);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* init_codeseg *
|
|
*===========================================================================*/
|
|
static void init_codeseg(int index, int privilege)
|
|
{
|
|
/* Build descriptor for a code segment. */
|
|
sdesc(&gdt[index], 0, 0xFFFFFFFF);
|
|
gdt[index].access = (privilege << DPL_SHIFT)
|
|
| (PRESENT | SEGMENT | EXECUTABLE | READABLE);
|
|
/* CONFORMING = 0, ACCESSED = 0 */
|
|
}
|
|
|
|
static struct gate_table_s gate_table_pic[] = {
|
|
{ hwint00, VECTOR( 0), INTR_PRIVILEGE },
|
|
{ hwint01, VECTOR( 1), INTR_PRIVILEGE },
|
|
{ hwint02, VECTOR( 2), INTR_PRIVILEGE },
|
|
{ hwint03, VECTOR( 3), INTR_PRIVILEGE },
|
|
{ hwint04, VECTOR( 4), INTR_PRIVILEGE },
|
|
{ hwint05, VECTOR( 5), INTR_PRIVILEGE },
|
|
{ hwint06, VECTOR( 6), INTR_PRIVILEGE },
|
|
{ hwint07, VECTOR( 7), INTR_PRIVILEGE },
|
|
{ hwint08, VECTOR( 8), INTR_PRIVILEGE },
|
|
{ hwint09, VECTOR( 9), INTR_PRIVILEGE },
|
|
{ hwint10, VECTOR(10), INTR_PRIVILEGE },
|
|
{ hwint11, VECTOR(11), INTR_PRIVILEGE },
|
|
{ hwint12, VECTOR(12), INTR_PRIVILEGE },
|
|
{ hwint13, VECTOR(13), INTR_PRIVILEGE },
|
|
{ hwint14, VECTOR(14), INTR_PRIVILEGE },
|
|
{ hwint15, VECTOR(15), INTR_PRIVILEGE },
|
|
{ NULL, 0, 0}
|
|
};
|
|
|
|
static struct gate_table_s gate_table_exceptions[] = {
|
|
{ divide_error, DIVIDE_VECTOR, INTR_PRIVILEGE },
|
|
{ single_step_exception, DEBUG_VECTOR, INTR_PRIVILEGE },
|
|
{ nmi, NMI_VECTOR, INTR_PRIVILEGE },
|
|
{ breakpoint_exception, BREAKPOINT_VECTOR, USER_PRIVILEGE },
|
|
{ overflow, OVERFLOW_VECTOR, USER_PRIVILEGE },
|
|
{ bounds_check, BOUNDS_VECTOR, INTR_PRIVILEGE },
|
|
{ inval_opcode, INVAL_OP_VECTOR, INTR_PRIVILEGE },
|
|
{ copr_not_available, COPROC_NOT_VECTOR, INTR_PRIVILEGE },
|
|
{ double_fault, DOUBLE_FAULT_VECTOR, INTR_PRIVILEGE },
|
|
{ copr_seg_overrun, COPROC_SEG_VECTOR, INTR_PRIVILEGE },
|
|
{ inval_tss, INVAL_TSS_VECTOR, INTR_PRIVILEGE },
|
|
{ segment_not_present, SEG_NOT_VECTOR, INTR_PRIVILEGE },
|
|
{ stack_exception, STACK_FAULT_VECTOR, INTR_PRIVILEGE },
|
|
{ general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE },
|
|
{ page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE },
|
|
{ copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE },
|
|
{ alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE },
|
|
{ machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE },
|
|
{ simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE },
|
|
{ ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
|
|
{ kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
|
|
{ ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
|
|
{ kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
|
|
{ NULL, 0, 0}
|
|
};
|
|
|
|
int tss_init(unsigned cpu, void * kernel_stack)
|
|
{
|
|
struct tss_s * t = &tss[cpu];
|
|
int index = TSS_INDEX(cpu);
|
|
struct segdesc_s *tssgdt;
|
|
|
|
tssgdt = &gdt[index];
|
|
|
|
init_param_dataseg(tssgdt, (phys_bytes) t,
|
|
sizeof(struct tss_s), INTR_PRIVILEGE);
|
|
tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE;
|
|
|
|
/* Build TSS. */
|
|
memset(t, 0, sizeof(*t));
|
|
t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR;
|
|
t->cs = KERN_CS_SELECTOR;
|
|
t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */
|
|
|
|
/*
|
|
* make space for process pointer and cpu id and point to the first
|
|
* usable word
|
|
*/
|
|
k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
|
|
/*
|
|
* set the cpu id at the top of the stack so we know on which cpu is
|
|
* this stak in use when we trap to kernel
|
|
*/
|
|
*((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
|
|
|
|
/* Set up Intel SYSENTER support if available. */
|
|
if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
|
|
ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
|
|
ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0);
|
|
ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter);
|
|
}
|
|
|
|
/* Set up AMD SYSCALL support if available. */
|
|
if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
|
|
u32_t msr_lo, msr_hi;
|
|
|
|
/* set SYSCALL ENABLE bit in EFER MSR */
|
|
ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo);
|
|
msr_lo |= AMD_EFER_SCE;
|
|
ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo);
|
|
|
|
/* set STAR register value */
|
|
#define set_star_cpu(forcpu) if(cpu == forcpu) { \
|
|
ia32_msr_write(AMD_MSR_STAR, \
|
|
((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \
|
|
(u32_t) ipc_entry_syscall_cpu ## forcpu); }
|
|
set_star_cpu(0);
|
|
set_star_cpu(1);
|
|
set_star_cpu(2);
|
|
set_star_cpu(3);
|
|
set_star_cpu(4);
|
|
set_star_cpu(5);
|
|
set_star_cpu(6);
|
|
set_star_cpu(7);
|
|
assert(CONFIG_MAX_CPUS <= 8);
|
|
}
|
|
|
|
return SEG_SELECTOR(index);
|
|
}
|
|
|
|
phys_bytes init_segdesc(int gdt_index, void *base, int size)
|
|
{
|
|
struct desctableptr_s *dtp = (struct desctableptr_s *) &gdt[gdt_index];
|
|
dtp->limit = size - 1;
|
|
dtp->base = (phys_bytes) base;
|
|
|
|
return (phys_bytes) dtp;
|
|
}
|
|
|
|
void int_gate(struct gatedesc_s *tab,
|
|
unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
|
|
{
|
|
/* Build descriptor for an interrupt gate. */
|
|
register struct gatedesc_s *idp;
|
|
|
|
idp = &tab[vec_nr];
|
|
idp->offset_low = offset;
|
|
idp->selector = KERN_CS_SELECTOR;
|
|
idp->p_dpl_type = dpl_type;
|
|
idp->offset_high = offset >> OFFSET_HIGH_SHIFT;
|
|
}
|
|
|
|
void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
|
|
{
|
|
int_gate(idt, vec_nr, offset, dpl_type);
|
|
}
|
|
|
|
void idt_copy_vectors(struct gate_table_s * first)
|
|
{
|
|
struct gate_table_s *gtp;
|
|
for (gtp = first; gtp->gate; gtp++) {
|
|
int_gate(idt, gtp->vec_nr, (vir_bytes) gtp->gate,
|
|
PRESENT | INT_GATE_TYPE |
|
|
(gtp->privilege << DPL_SHIFT));
|
|
}
|
|
}
|
|
|
|
void idt_copy_vectors_pic(void)
|
|
{
|
|
idt_copy_vectors(gate_table_pic);
|
|
}
|
|
|
|
void idt_init(void)
|
|
{
|
|
idt_copy_vectors_pic();
|
|
idt_copy_vectors(gate_table_exceptions);
|
|
}
|
|
|
|
struct desctableptr_s gdt_desc, idt_desc;
|
|
|
|
void idt_reload(void)
|
|
{
|
|
x86_lidt(&idt_desc);
|
|
}
|
|
|
|
multiboot_module_t *bootmod(int pnr)
|
|
{
|
|
int i;
|
|
|
|
assert(pnr >= 0);
|
|
|
|
/* Search for desired process in boot process
|
|
* list. The first NR_TASKS ones do not correspond
|
|
* to a module, however, so we don't search those.
|
|
*/
|
|
for(i = NR_TASKS; i < NR_BOOT_PROCS; i++) {
|
|
int p;
|
|
p = i - NR_TASKS;
|
|
if(image[i].proc_nr == pnr) {
|
|
assert(p < MULTIBOOT_MAX_MODS);
|
|
assert(p < kinfo.mbi.mods_count);
|
|
return &kinfo.module_list[p];
|
|
}
|
|
}
|
|
|
|
panic("boot module %d not found", pnr);
|
|
}
|
|
|
|
int booting_cpu = 0;
|
|
|
|
void prot_load_selectors(void)
|
|
{
|
|
/* this function is called by both prot_init by the BSP and
|
|
* the early AP booting code in mpx.S by secondary CPU's.
|
|
* everything is set up the same except for the TSS that is per-CPU.
|
|
*/
|
|
x86_lgdt(&gdt_desc); /* Load gdt */
|
|
idt_init();
|
|
idt_reload();
|
|
x86_lldt(LDT_SELECTOR); /* Load bogus ldt */
|
|
x86_ltr(TSS_SELECTOR(booting_cpu));
|
|
|
|
x86_load_kerncs();
|
|
x86_load_ds(KERN_DS_SELECTOR);
|
|
x86_load_es(KERN_DS_SELECTOR);
|
|
x86_load_fs(KERN_DS_SELECTOR);
|
|
x86_load_gs(KERN_DS_SELECTOR);
|
|
x86_load_ss(KERN_DS_SELECTOR);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* prot_init *
|
|
*===========================================================================*/
|
|
void prot_init()
|
|
{
|
|
extern char k_boot_stktop;
|
|
|
|
if(_cpufeature(_CPUF_I386_SYSENTER))
|
|
minix_feature_flags |= MKF_I386_INTEL_SYSENTER;
|
|
if(_cpufeature(_CPUF_I386_SYSCALL))
|
|
minix_feature_flags |= MKF_I386_AMD_SYSCALL;
|
|
|
|
memset(gdt, 0, sizeof(gdt));
|
|
memset(idt, 0, sizeof(idt));
|
|
|
|
/* Build GDT, IDT, IDT descriptors. */
|
|
gdt_desc.base = (u32_t) gdt;
|
|
gdt_desc.limit = sizeof(gdt)-1;
|
|
idt_desc.base = (u32_t) idt;
|
|
idt_desc.limit = sizeof(idt)-1;
|
|
tss_init(0, &k_boot_stktop);
|
|
|
|
/* Build GDT */
|
|
init_param_dataseg(&gdt[LDT_INDEX],
|
|
(phys_bytes) 0, 0, INTR_PRIVILEGE); /* unusable LDT */
|
|
gdt[LDT_INDEX].access = PRESENT | LDT;
|
|
init_codeseg(KERN_CS_INDEX, INTR_PRIVILEGE);
|
|
init_dataseg(KERN_DS_INDEX, INTR_PRIVILEGE);
|
|
init_codeseg(USER_CS_INDEX, USER_PRIVILEGE);
|
|
init_dataseg(USER_DS_INDEX, USER_PRIVILEGE);
|
|
|
|
/* Currently the multiboot segments are loaded; which is fine, but
|
|
* let's replace them with the ones from our own GDT so we test
|
|
* right away whether they work as expected.
|
|
*/
|
|
prot_load_selectors();
|
|
|
|
/* Set up a new post-relocate bootstrap pagetable so that
|
|
* we can map in VM, and we no longer rely on pre-relocated
|
|
* data.
|
|
*/
|
|
|
|
pg_clear();
|
|
pg_identity(&kinfo); /* Still need 1:1 for lapic and video mem and such. */
|
|
pg_mapkernel();
|
|
pg_load();
|
|
|
|
prot_init_done = 1;
|
|
}
|
|
|
|
static int alloc_for_vm = 0;
|
|
|
|
void arch_post_init(void)
|
|
{
|
|
/* Let memory mapping code know what's going on at bootstrap time */
|
|
struct proc *vm;
|
|
vm = proc_addr(VM_PROC_NR);
|
|
get_cpulocal_var(ptproc) = vm;
|
|
pg_info(&vm->p_seg.p_cr3, &vm->p_seg.p_cr3_v);
|
|
}
|
|
|
|
static int libexec_pg_alloc(struct exec_info *execi, vir_bytes vaddr, size_t len)
|
|
{
|
|
pg_map(PG_ALLOCATEME, vaddr, vaddr+len, &kinfo);
|
|
pg_load();
|
|
memset((char *) vaddr, 0, len);
|
|
alloc_for_vm += len;
|
|
return OK;
|
|
}
|
|
|
|
void arch_boot_proc(struct boot_image *ip, struct proc *rp)
|
|
{
|
|
multiboot_module_t *mod;
|
|
struct ps_strings *psp;
|
|
char *sp;
|
|
|
|
if(rp->p_nr < 0) return;
|
|
|
|
mod = bootmod(rp->p_nr);
|
|
|
|
/* Important special case: we put VM in the bootstrap pagetable
|
|
* so it can run.
|
|
*/
|
|
|
|
if(rp->p_nr == VM_PROC_NR) {
|
|
struct exec_info execi;
|
|
|
|
memset(&execi, 0, sizeof(execi));
|
|
|
|
/* exec parameters */
|
|
execi.stack_high = kinfo.user_sp;
|
|
execi.stack_size = 64 * 1024; /* not too crazy as it must be preallocated */
|
|
execi.proc_e = ip->endpoint;
|
|
execi.hdr = (char *) mod->mod_start; /* phys mem direct */
|
|
execi.filesize = execi.hdr_len = mod->mod_end - mod->mod_start;
|
|
strlcpy(execi.progname, ip->proc_name, sizeof(execi.progname));
|
|
execi.frame_len = 0;
|
|
|
|
/* callbacks for use in the kernel */
|
|
execi.copymem = libexec_copy_memcpy;
|
|
execi.clearmem = libexec_clear_memset;
|
|
execi.allocmem_prealloc_junk = libexec_pg_alloc;
|
|
execi.allocmem_prealloc_cleared = libexec_pg_alloc;
|
|
execi.allocmem_ondemand = libexec_pg_alloc;
|
|
execi.clearproc = NULL;
|
|
|
|
/* parse VM ELF binary and alloc/map it into bootstrap pagetable */
|
|
if(libexec_load_elf(&execi) != OK)
|
|
panic("VM loading failed");
|
|
|
|
/* Setup a ps_strings struct on the stack, pointing to the
|
|
* following argv, envp. */
|
|
sp = (char *)execi.stack_high;
|
|
sp -= sizeof(struct ps_strings);
|
|
psp = (struct ps_strings *) sp;
|
|
|
|
/* Take the stack pointer down three words to give startup code
|
|
* something to use as "argc", "argv" and "envp".
|
|
*/
|
|
sp -= (sizeof(void *) + sizeof(void *) + sizeof(int));
|
|
|
|
// linear address space, so it is available.
|
|
psp->ps_argvstr = (char **)(sp + sizeof(int));
|
|
psp->ps_nargvstr = 0;
|
|
psp->ps_envstr = psp->ps_argvstr + sizeof(void *);
|
|
psp->ps_nenvstr = 0;
|
|
|
|
arch_proc_init(rp, execi.pc, (vir_bytes)sp,
|
|
execi.stack_high - sizeof(struct ps_strings),
|
|
ip->proc_name);
|
|
|
|
/* Free VM blob that was just copied into existence. */
|
|
add_memmap(&kinfo, mod->mod_start, mod->mod_end-mod->mod_start);
|
|
mod->mod_end = mod->mod_start = 0;
|
|
|
|
/* Remember them */
|
|
kinfo.vm_allocated_bytes = alloc_for_vm;
|
|
}
|
|
}
|