diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index 79ab15860..5f6dd1071 100755 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -16,21 +16,21 @@ extern int vm_copy_in_progress, catch_pagefaults; extern struct proc *vm_copy_from, *vm_copy_to; -void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, - u32_t *old_eipptr, u32_t *old_eaxptr, u32_t pagefaultcr2) +void pagefault( struct proc *pr, + struct exception_frame * frame, + int is_nested) { int s; vir_bytes ph; u32_t pte; int procok = 0, pcok = 0, rangeok = 0; int in_physcopy = 0; - vir_bytes test_eip; - vmassert(old_eipptr); - vmassert(old_eaxptr); + reg_t pagefaultcr2; - vmassert(*old_eipptr == old_eip); - vmassert(old_eipptr != &old_eip); + vmassert(frame); + + pagefaultcr2 = read_cr2(); #if 0 printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n", @@ -41,18 +41,21 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, vmassert(pr->p_seg.p_cr3 == read_cr3()); } - test_eip = k_reenter ? old_eip : pr->p_reg.pc; + in_physcopy = (frame->eip > (vir_bytes) phys_copy) && + (frame->eip < (vir_bytes) phys_copy_fault); - in_physcopy = (test_eip > (vir_bytes) phys_copy) && - (test_eip < (vir_bytes) phys_copy_fault); - - if((k_reenter || iskernelp(pr)) && + if((is_nested || iskernelp(pr)) && catch_pagefaults && in_physcopy) { #if 0 printf("pf caught! addr 0x%lx\n", pagefaultcr2); #endif - *old_eipptr = (u32_t) phys_copy_fault; - *old_eaxptr = pagefaultcr2; + if (is_nested) { + frame->eip = (reg_t) phys_copy_fault_in_kernel; + } + else { + pr->p_reg.pc = (reg_t) phys_copy_fault; + pr->p_reg.retreg = pagefaultcr2; + } return; } @@ -61,19 +64,19 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, * have page faults. VM does have its own page table but also * can't have page faults (because VM has to handle them). */ - if(k_reenter || (pr->p_endpoint <= INIT_PROC_NR && + if(is_nested || (pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) { /* Page fault we can't / don't want to * handle. */ - kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, k_reenter %d\n", + kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, is_nested %d\n", pr->p_endpoint, pr->p_name, pr->p_reg.pc, - pagefaultcr2, trap_errno, k_reenter); + pagefaultcr2, frame->errcode, is_nested); proc_stacktrace(pr); if(pr->p_endpoint != SYSTEM) { proc_stacktrace(proc_addr(SYSTEM)); } - kprintf("pc of pagefault: 0x%lx\n", test_eip); + kprintf("pc of pagefault: 0x%lx\n", frame->eip); minix_panic("page fault in system process", pr->p_endpoint); return; @@ -90,7 +93,7 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, * handled. */ pr->p_pagefault.pf_virtual = pagefaultcr2; - pr->p_pagefault.pf_flags = trap_errno; + pr->p_pagefault.pf_flags = frame->errcode; pr->p_nextpagefault = pagefaults; pagefaults = pr; @@ -102,16 +105,7 @@ void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, /*===========================================================================* * exception * *===========================================================================*/ -PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags, - old_eipptr, old_eaxptr, pagefaultcr2) -unsigned vec_nr; -u32_t trap_errno; -u32_t old_eip; -U16_t old_cs; -u32_t old_eflags; -u32_t *old_eipptr; -u32_t *old_eaxptr; -u32_t pagefaultcr2; +PUBLIC void exception_handler(int is_nested, struct exception_frame * frame) { /* An exception or unexpected interrupt has occurred. */ @@ -144,41 +138,35 @@ struct proc *t; register struct ex_s *ep; struct proc *saved_proc; - if(k_reenter > 2) { - /* This can't end well. */ - minix_panic("exception: k_reenter too high", k_reenter); - } - /* Save proc_ptr, because it may be changed by debug statements. */ saved_proc = proc_ptr; - ep = &ex_data[vec_nr]; + ep = &ex_data[frame->vector]; - if (vec_nr == 2) { /* spurious NMI on some machines */ + if (frame->vector == 2) { /* spurious NMI on some machines */ kprintf("got spurious NMI\n"); return; } - if(vec_nr == PAGE_FAULT_VECTOR) { - pagefault(old_eip, saved_proc, trap_errno, - old_eipptr, old_eaxptr, pagefaultcr2); + if(frame->vector == PAGE_FAULT_VECTOR) { + pagefault(saved_proc, frame, is_nested); return; } - /* If an exception occurs while running a process, the k_reenter variable - * will be zero. Exceptions in interrupt handlers or system traps will make - * k_reenter larger than zero. + /* If an exception occurs while running a process, the is_nested variable + * will be zero. Exceptions in interrupt handlers or system traps will make + * is_nested non-zero. */ - if (k_reenter == 0 && ! iskernelp(saved_proc)) { + if (is_nested == 0 && ! iskernelp(saved_proc)) { #if 0 { kprintf( "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n", - vec_nr, (unsigned long)trap_errno, - (unsigned long)old_eip, old_cs, - (unsigned long)old_eflags); - printseg("cs: ", 1, saved_proc, old_cs); + frame->vector, (unsigned long)frame->errcode, + (unsigned long)frame->eip, frame->cs, + (unsigned long)frame->eflags); + printseg("cs: ", 1, saved_proc, frame->cs); printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); @@ -193,13 +181,13 @@ struct proc *t; /* Exception in system code. This is not supposed to happen. */ if (ep->msg == NIL_PTR || machine.processor < ep->minprocessor) - kprintf("\nIntel-reserved exception %d\n", vec_nr); + kprintf("\nIntel-reserved exception %d\n", frame->vector); else kprintf("\n%s\n", ep->msg); - kprintf("k_reenter = %d ", k_reenter); + kprintf("is_nested = %d ", is_nested); kprintf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, cs= 0x%x, eflags= 0x%x\n", - vec_nr, trap_errno, old_eip, old_cs, old_eflags); + frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags); /* TODO should we enable this only when compiled for some debug mode? */ if (saved_proc) { kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); diff --git a/kernel/arch/i386/klib386.S b/kernel/arch/i386/klib386.S index d4c226631..9a0eb5295 100644 --- a/kernel/arch/i386/klib386.S +++ b/kernel/arch/i386/klib386.S @@ -26,6 +26,7 @@ .globl phys_outsb /* likewise byte by byte */ .globl phys_copy /* copy data from anywhere to anywhere in memory */ .globl phys_copy_fault /* phys_copy pagefault */ +.globl phys_copy_fault_in_kernel /* phys_copy pagefault in kernel */ .globl phys_memset /* write pattern anywhere in memory */ .globl mem_rdw /* copy one word from [segment:offset] */ .globl reset /* reset the system */ @@ -33,6 +34,7 @@ .globl level0 /* call a function at level 0 */ .globl read_cpu_flags /* read the cpu flags */ .globl read_cr0 /* read cr0 */ +.globl read_cr2 /* read cr2 */ .globl getcr3val .globl write_cr0 /* write a value in cr0 */ .globl read_cr4 @@ -343,6 +345,13 @@ phys_copy_fault: /* kernel can send us here */ pop %esi ret +phys_copy_fault_in_kernel: /* kernel can send us here */ + pop %es + pop %edi + pop %esi + mov %cr2, %eax + ret + /*===========================================================================*/ /* phys_memset */ /*===========================================================================*/ @@ -436,7 +445,7 @@ idle_task: */ push $halt call level0 /* level0(halt) */ - pop %eax + add $4, %esp jmp idle_task halt: sti @@ -453,18 +462,23 @@ halt: * things that are only possible at the most privileged CPU level. */ level0: - mov 4(%esp), %eax - cmpb $-1, k_reenter + /* check whether we are already running in kernel, the kernel cs + * selector has 3 lower bits zeroed */ + mov %cs, %ax + cmpw $CS_SELECTOR, %ax jne 0f - int $LEVEL0_VECTOR - ret -0: + /* call the function directly as if it was a normal function call */ + mov 4(%esp), %eax call *%eax ret - + /* if not runnig in the kernel yet, trap to kernel */ +0: + mov 4(%esp), %eax + int $LEVEL0_VECTOR + ret /*===========================================================================*/ /* read_flags */ @@ -521,6 +535,14 @@ write_cr0: pop %ebp ret +/*===========================================================================*/ +/* read_cr2 */ +/*===========================================================================*/ +/* PUBLIC reg_t read_cr2(void); */ +read_cr2: + mov %cr2, %eax + ret + /*===========================================================================*/ /* read_cr4 */ /*===========================================================================*/ diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index a875a590b..ccb3f9d1e 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -922,7 +922,6 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ target->p_endpoint, target->p_name); #endif - vmassert(k_reenter == -1); vmassert(proc_ptr->p_endpoint == SYSTEM); vm_suspend(caller, target, lin, bytes, wr, VMSTYPE_KERNELCALL); diff --git a/kernel/arch/i386/mpx386.S b/kernel/arch/i386/mpx386.S index 8536bd813..83c73d76e 100644 --- a/kernel/arch/i386/mpx386.S +++ b/kernel/arch/i386/mpx386.S @@ -1,4 +1,4 @@ -/* +/* * This file, mpx386.s, is included by mpx.s when Minix is compiled for * 32-bit Intel CPUs. The alternative mpx88.s is compiled for 16-bit CPUs. * @@ -82,14 +82,9 @@ begbss: */ .globl restart -.globl save .globl reload_cr3 .globl write_cr3 -.globl errexception -.globl exception1 -.globl exception - .globl divide_error .globl single_step_exception .globl nmi @@ -129,8 +124,6 @@ begbss: .globl hwint14 .globl hwint15 -.globl s_call -.globl p_s_call .globl level0_call /* Exported variables. */ @@ -200,7 +193,7 @@ copygdt: mov %ax, %fs mov %ax, %gs mov %ax, %ss - mov $k_stktop, %esp /* set sp to point to the top of kernel stack */ + mov $k_boot_stktop, %esp /* set sp to point to the top of kernel stack */ /* Save boot parameters into these global variables for i386 code */ movl %edx, params_size @@ -242,18 +235,33 @@ csinit: /* interrupt handlers for 386 32-bit protected mode */ /*===========================================================================*/ +#define PIC_IRQ_HANDLER(irq) \ + push $irq ;\ + call irq_handle /* intr_handle(irq_handlers[irq]) */ ;\ + add $4, %esp ; + /*===========================================================================*/ /* hwint00 - 07 */ /*===========================================================================*/ /* Note this is a macro, it just looks like a subroutine. */ -#define hwint_master(irq) \ - call save /* save interrupted process state */;\ - push $irq ;\ - call irq_handle /* irq_handle(irq) */;\ - pop %ecx ;\ - movb $END_OF_INT, %al ;\ - outb $INT_CTL /* reenable master 8259 */;\ - ret /* restart (another) process */ + +#define hwint_master(irq) \ + TEST_INT_IN_KERNEL(4, 0f) ;\ + \ + SAVE_PROCESS_CTX(0) ;\ + movl $0, %ebp /* for stack trace */ ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + jmp restart ;\ + \ +0: \ + pusha ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + popa ;\ + iret ; /* Each of these entry points is an expansion of the hwint_master macro */ .balign 16 @@ -301,14 +309,24 @@ hwint07: /*===========================================================================*/ /* Note this is a macro, it just looks like a subroutine. */ #define hwint_slave(irq) \ - call save /* save interrupted process state */;\ - push $irq ;\ - call irq_handle /* irq_handle(irq) */;\ - pop %ecx ;\ - movb $END_OF_INT, %al ;\ - outb $INT_CTL /* reenable master 8259 */;\ - outb $INT2_CTL /* reenable slave 8259 */;\ - ret /* restart (another) process */ + TEST_INT_IN_KERNEL(4, 0f) ;\ + \ + SAVE_PROCESS_CTX(0) ;\ + movl $0, %ebp /* for stack trace */ ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + outb $INT2_CTL /* reenable slave 8259 */ ;\ + jmp restart ;\ + \ +0: \ + pusha ;\ + PIC_IRQ_HANDLER(irq) ;\ + movb $END_OF_INT, %al ;\ + outb $INT_CTL /* reenable interrupts in master pic */ ;\ + outb $INT2_CTL /* reenable slave 8259 */ ;\ + popa ;\ + iret ; /* Each of these entry points is an expansion of the hwint_slave macro */ .balign 16 @@ -351,229 +369,185 @@ hwint15: /* Interrupt routine for irq 15 */ hwint_slave(15) -/*===========================================================================*/ -/* save */ -/*===========================================================================*/ /* - * Save for protected mode. - * This is much simpler than for 8086 mode, because the stack already points - * into the process table, or has already been switched to the kernel stack. + * syscall is only from a process to kernel */ +.align 16 +.globl syscall_entry +syscall_entry: -.balign 16 -save: - cld /* set direction flag to a known value */ - pushal /* save "general" registers */ - pushw %ds /* save ds */ - pushw %es /* save es */ - pushw %fs /* save fs */ - pushw %gs /* save gs */ - mov %ss, %dx /* ss is kernel data segment */ - mov %dx, %ds /* load rest of kernel segments */ - mov %dx, %es /* kernel does not use fs, gs */ - mov %esp, %eax /* prepare to return */ - incb k_reenter /* from -1 if not reentering */ - jne set_restart1 /* stack is already kernel stack */ - mov $k_stktop, %esp - push $restart /* build return address for int handler */ - xor %ebp, %ebp /* for stacktrace */ - jmp *RETADR-P_STACKBASE(%eax) + SAVE_PROCESS_CTX(0) -.balign 4 -set_restart1: - push $restart1 - jmp *RETADR-P_STACKBASE(%eax) + /* save the pointer to the current process */ + push %ebp -/*===========================================================================*/ -/* _s_call */ -/*===========================================================================*/ -.balign 16 -s_call: -p_s_call: - cld /* set direction flag to a known value */ - sub $4, %esp /* skip RETADR */ - pusha /* save "general" registers */ - pushw %ds - pushw %es - pushw %fs - pushw %gs + /* + * pass the syscall arguments from userspace to the handler. + * SAVE_PROCESS_CTX() does not clobber these registers, they are still + * set as the userspace have set them + */ + push %edx + push %ebx + push %eax + push %ecx - mov %ss, %si /* ss is kernel data segment */ - mov %si, %ds /* load rest of kernel segments */ - mov %si, %es /* kernel does not use fs, gs */ - incb k_reenter /* increment kernel entry count */ - mov %esp, %esi /* assumes P_STACKBASE == 0 */ - mov $k_stktop, %esp - xor %ebp, %ebp /* for stacktrace */ -/* end of inline save */ -/* now set up parameters for sys_call() */ - push %edx /* event set or flags bit map */ - push %ebx /* pointer to user message */ - push %eax /* source / destination */ - push %ecx /* call number (ipc primitive to use) */ + /* for stack trace */ + movl $0, %ebp - call sys_call /* sys_call(call_nr, src_dst, m_ptr, bit_map) */ -/* caller is now explicitly in proc_ptr */ - mov %eax, AXREG(%esi) + call sys_call -/* Fall into code to restart proc/task running. */ + /* restore the current process pointer and save the return value */ + add $4 * 4, %esp + pop %esi + mov %eax, AXREG(%esi) + + jmp restart + + +.align 16 +/* + * called by the exception interrupt vectors. If the exception does not push + * errorcode, we assume that the vector handler pushed 0 instead. Next pushed + * thing is the vector number. From this point on we can continue as if every + * exception pushes an error code + */ +exception_entry: + /* + * check if it is a nested trap by comparing the saved code segment + * descriptor with the kernel CS first + */ + TEST_INT_IN_KERNEL(12, exception_entry_nested) + +exception_entry_from_user: + + cld + + SAVE_PROCESS_CTX(8) + + /* for stack trace clear %ebp */ + movl $0, %ebp + + /* + * push a pointer to the interrupt state pushed by the cpu and the + * vector number pushed by the vector handler just before calling + * exception_entry and call the exception handler. + */ + push %esp + push $0 /* it's not a nested exception */ + call exception_handler + + jmp restart + +exception_entry_nested: + + pusha + mov %esp, %eax + add $(8 * 4), %eax + push %eax + pushl $1 /* it's a nested exception */ + call exception_handler + add $8, %esp + popa + + /* clear the error code and the exception number */ + add $8, %esp + /* resume execution at the point of exception */ + iret /*===========================================================================*/ /* restart */ /*===========================================================================*/ restart: - -/* Restart the current process or the next process if it is set. */ - - cli call schedcheck - movl proc_ptr, %esp /* will assume P_STACKBASE == 0 */ + + /* %eax is set by schedcheck() to the process to run */ + mov %eax, %esp /* will assume P_STACKBASE == 0 */ + lldt P_LDT_SEL(%esp) /* enable process' segment descriptors */ cmpl $0, P_CR3(%esp) jz 0f + + /* + * test if the cr3 is loaded with the current value to avoid unnecessary + * TLB flushes + */ mov P_CR3(%esp), %eax - cmpl loadedcr3, %eax + mov %cr3, %ecx + cmp %ecx, %eax jz 0f mov %eax, %cr3 - mov %eax, loadedcr3 - mov proc_ptr, %eax - mov %eax, ptproc + mov %esp, ptproc movl $0, dirtypde 0: - lea P_STACKTOP(%esp), %eax /* arrange for next interrupt */ - movl %eax, tss+TSS3_S_SP0 /* to save state in process table */ -restart1: - decb k_reenter + popw %gs popw %fs popw %es popw %ds popal - add $4, %esp /* skip return adr */ + add $4, %esp /* skip return adr FIXME unused value */ iret /* continue process */ /*===========================================================================*/ /* exception handlers */ /*===========================================================================*/ + +#define EXCEPTION_ERR_CODE(vector) \ + push $vector ;\ + jmp exception_entry + +#define EXCEPTION_NO_ERR_CODE(vector) \ + pushl $0 ;\ + EXCEPTION_ERR_CODE(vector) + divide_error: - push $DIVIDE_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(DIVIDE_VECTOR) single_step_exception: - push $DEBUG_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(DEBUG_VECTOR) nmi: - push $NMI_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(NMI_VECTOR) breakpoint_exception: - push $BREAKPOINT_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(BREAKPOINT_VECTOR) overflow: - push $OVERFLOW_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(OVERFLOW_VECTOR) bounds_check: - push $BOUNDS_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(BOUNDS_VECTOR) inval_opcode: - push $INVAL_OP_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(INVAL_OP_VECTOR) copr_not_available: - push $COPROC_NOT_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(COPROC_NOT_VECTOR) double_fault: - push $DOUBLE_FAULT_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(DOUBLE_FAULT_VECTOR) copr_seg_overrun: - push $COPROC_SEG_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(COPROC_SEG_VECTOR) inval_tss: - push $INVAL_TSS_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(INVAL_TSS_VECTOR) segment_not_present: - push $SEG_NOT_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(SEG_NOT_VECTOR) stack_exception: - push $STACK_FAULT_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(STACK_FAULT_VECTOR) general_protection: - push $PROTECTION_VECTOR - jmp errexception + EXCEPTION_ERR_CODE(PROTECTION_VECTOR) page_fault: - push $PAGE_FAULT_VECTOR - push %eax - mov %cr2, %eax - movl %eax, %ss:pagefaultcr2 - pop %eax - jmp errexception + EXCEPTION_ERR_CODE(PAGE_FAULT_VECTOR) copr_error: - push $COPROC_ERR_VECTOR - jmp handle_exception + EXCEPTION_NO_ERR_CODE(COPROC_ERR_VECTOR) -/*===========================================================================*/ -/* handle_exception */ -/*===========================================================================*/ -/* This is called for all exceptions which do not push an error code. */ - -.balign 16 -handle_exception: - movl $0, %ss:trap_errno /* clear trap_errno */ - pop %ss:ex_number - jmp exception1 - -/*===========================================================================*/ -/* errexception */ -/*===========================================================================*/ -/* This is called for all exceptions which push an error code. */ - -.balign 16 -errexception: - pop %ss:ex_number - pop %ss:trap_errno -exception1: -/* Common for all exceptions. */ - movl %esp, %ss:old_eax_ptr /* where will eax be saved */ - subl $PCREG-AXREG, %ss:old_eax_ptr /* here */ - - push %eax /* eax is scratch register */ - - mov 0+4(%esp), %eax /* old eip */ - movl %eax, %ss:old_eip - mov %esp, %eax - add $4, %eax - mov %eax, %ss:old_eip_ptr - movzwl 4+4(%esp), %eax /* old cs */ - movl %eax, %ss:old_cs - mov 8+4(%esp), %eax /* old eflags */ - movl %eax, %ss:old_eflags - - pop %eax - call save - push pagefaultcr2 - push old_eax_ptr - push old_eip_ptr - push old_eflags - push old_cs - push old_eip - push trap_errno - push ex_number - call exception /* (ex_number, trap_errno, old_eip, */ - /* old_cs, old_eflags) */ - add $8*4, %esp - ret /*===========================================================================*/ /* write_cr3 */ @@ -583,10 +557,10 @@ write_cr3: push %ebp mov %esp, %ebp mov 8(%ebp), %eax - cmpl loadedcr3, %eax + mov %cr3, %ecx + cmp %ecx, %eax jz 0f mov %eax, %cr3 - mov %eax, loadedcr3 movl $0, dirtypde 0: pop %ebp @@ -597,13 +571,17 @@ write_cr3: /*===========================================================================*/ level0_call: /* - * which level0 function to call was passed here by putting it in eax, so - * we get that from the saved state. + * which level0 function to call was passed here by putting it in %eax */ - call save - movl proc_ptr, %eax - movl AXREG(%eax), %eax - jmp *%eax + SAVE_PROCESS_CTX(0) + /* for stack trace */ + movl $0, %ebp + /* + * the function to call is in %eax, set in userspace. SAVE_PROCESS_CTX() + * does not clobber this register so we can use it straightaway + */ + call *%eax + jmp restart /*===========================================================================*/ @@ -631,16 +609,11 @@ reload_cr3: .short 0x526F /* this must be the first data entry (magic #) */ .bss -k_stack: -.space K_STACK_BYTES /* kernel stack */ -k_stktop: -/* top of kernel stack */ -.lcomm ex_number, 4 -.lcomm trap_errno, 4 -.lcomm old_eip_ptr, 4 -.lcomm old_eax_ptr, 4 -.lcomm old_eip, 4 -.lcomm old_cs, 4 -.lcomm old_eflags, 4 -.lcomm pagefaultcr2, 4 -.lcomm loadedcr3, 4 +/* + * this stack is used temporarily for booting only. We switch to a proper kernel + * stack after the first trap to kernel + */ +.globl k_boot_stktop +k_boot_stack: +.space 4096 /* kernel stack */ /* FIXME use macro here */ +k_boot_stktop: /* top of kernel stack */ diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index c114baac2..3660ff52c 100755 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -25,37 +25,6 @@ struct gatedesc_s { u16_t offset_high; }; -struct tss_s { - reg_t backlink; - reg_t sp0; /* stack pointer to use during interrupt */ - reg_t ss0; /* " segment " " " " */ - reg_t sp1; - reg_t ss1; - reg_t sp2; - reg_t ss2; - reg_t cr3; - reg_t ip; - reg_t flags; - reg_t ax; - reg_t cx; - reg_t dx; - reg_t bx; - reg_t sp; - reg_t bp; - reg_t si; - reg_t di; - reg_t es; - reg_t cs; - reg_t ss; - reg_t ds; - reg_t fs; - reg_t gs; - reg_t ldt; - u16_t trap; - u16_t iobase; -/* u8_t iomap[0]; */ -}; - PUBLIC struct segdesc_s gdt[GDT_SIZE]; /* used in klib.s and mpx.s */ PRIVATE struct gatedesc_s idt[IDT_SIZE]; /* zero-init so none present */ PUBLIC struct tss_s tss; /* zero init */ @@ -204,13 +173,7 @@ PUBLIC void prot_init(void) rp->p_seg.p_ldt_sel = ldt_index * DESC_SIZE; } - /* Build main TSS. - * This is used only to record the stack pointer to be used after an - * interrupt. - * The pointer is set up so that an interrupt automatically saves the - * current process's registers ip:cs:f:sp:ss in the correct slots in the - * process table. - */ + /* Build main TSS */ tss.ss0 = DS_SELECTOR; init_dataseg(&gdt[TSS_INDEX], vir2phys(&tss), sizeof(tss), INTR_PRIVILEGE); gdt[TSS_INDEX].access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; @@ -249,7 +212,7 @@ PUBLIC void idt_init(void) { general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE }, { page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE }, { copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE }, - { s_call, SYS386_VECTOR, USER_PRIVILEGE },/* 386 system call */ + { syscall_entry, SYS386_VECTOR, USER_PRIVILEGE },/* 386 system call */ { level0_call, LEVEL0_VECTOR, TASK_PRIVILEGE }, { NULL, 0, 0} }; diff --git a/kernel/arch/i386/proto.h b/kernel/arch/i386/proto.h index 3e641523d..24cc63f95 100644 --- a/kernel/arch/i386/proto.h +++ b/kernel/arch/i386/proto.h @@ -20,7 +20,6 @@ _PROTOTYPE( void hwint13, (void) ); _PROTOTYPE( void hwint14, (void) ); _PROTOTYPE( void hwint15, (void) ); - /* Exception handlers (real or protected mode), in numerical order. */ void _PROTOTYPE( int00, (void) ), _PROTOTYPE( divide_error, (void) ); void _PROTOTYPE( int01, (void) ), _PROTOTYPE( single_step_exception, (void) ); @@ -41,7 +40,7 @@ void _PROTOTYPE( copr_error, (void) ); /* Software interrupt handlers, in numerical order. */ _PROTOTYPE( void trp, (void) ); -_PROTOTYPE( void s_call, (void) ), _PROTOTYPE( p_s_call, (void) ); +_PROTOTYPE( void syscall_entry, (void) ); _PROTOTYPE( void level0_call, (void) ); /* memory.c */ @@ -51,16 +50,25 @@ _PROTOTYPE( void vm_set_cr3, (struct proc *)); /* exception.c */ -_PROTOTYPE( void exception, (unsigned vec_nr, u32_t trap_errno, - u32_t old_eip, U16_t old_cs, u32_t old_eflags, - u32_t *old_eip_ptr, u32_t *old_eax_ptr, u32_t pagefaultcr2) ); +struct exception_frame { + reg_t vector; /* which interrupt vector was triggered */ + reg_t errcode; /* zero if no exception does not push err code */ + reg_t eip; + reg_t cs; + reg_t eflags; + reg_t esp; /* undefined if trap is nested */ + reg_t ss; /* undefined if trap is nested */ +}; + +_PROTOTYPE( void exception, (struct exception_frame * frame)); /* klib386.s */ _PROTOTYPE( void level0, (void (*func)(void)) ); _PROTOTYPE( void monitor, (void) ); _PROTOTYPE( void reset, (void) ); _PROTOTYPE( void int86, (void) ); -_PROTOTYPE( unsigned long read_cr0, (void) ); +_PROTOTYPE( reg_t read_cr0, (void) ); +_PROTOTYPE( reg_t read_cr2, (void) ); _PROTOTYPE( void write_cr0, (unsigned long value) ); _PROTOTYPE( unsigned long read_cr4, (void) ); _PROTOTYPE( void write_cr4, (unsigned long value) ); @@ -75,6 +83,39 @@ _PROTOTYPE( void reload_cr3, (void) ); _PROTOTYPE( void phys_memset, (phys_bytes ph, u32_t c, phys_bytes bytes) ); /* protect.c */ +struct tss_s { + reg_t backlink; + reg_t sp0; /* stack pointer to use during interrupt */ + reg_t ss0; /* " segment " " " " */ + reg_t sp1; + reg_t ss1; + reg_t sp2; + reg_t ss2; + reg_t cr3; + reg_t ip; + reg_t flags; + reg_t ax; + reg_t cx; + reg_t dx; + reg_t bx; + reg_t sp; + reg_t bp; + reg_t si; + reg_t di; + reg_t es; + reg_t cs; + reg_t ss; + reg_t ds; + reg_t fs; + reg_t gs; + reg_t ldt; + u16_t trap; + u16_t iobase; +/* u8_t iomap[0]; */ +}; + +EXTERN struct tss_s tss; + _PROTOTYPE( void prot_init, (void) ); _PROTOTYPE( void idt_init, (void) ); _PROTOTYPE( void init_codeseg, (struct segdesc_s *segdp, phys_bytes base, @@ -100,6 +141,10 @@ EXTERN struct gate_table_s gate_table_pic[]; /* copies an array of vectors to the IDT. The last vector must be zero filled */ _PROTOTYPE(void idt_copy_vectors, (struct gate_table_s * first)); +EXTERN void * k_boot_stktop; +_PROTOTYPE(void tss_init, (struct tss_s * tss, void * kernel_stack, unsigned cpu)); + + /* functions defined in architecture-independent kernel source. */ #include "../../proto.h" diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 1837fa51a..2dbaa0163 100755 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -1,3 +1,8 @@ +#ifndef __SCONST_H__ +#define __SCONST_H__ + +#include "../../const.h" + /* Miscellaneous constants used in assembler code. */ W = _WORD_SIZE /* Machine word size. */ @@ -26,3 +31,104 @@ P_CR3 = P_LDT_SEL+W P_LDT = P_CR3+W Msize = 9 /* size of a message in 32-bit words*/ + + +/* + * offset to current process pointer right after trap, we assume we always have + * error code on the stack + */ +#define CURR_PROC_PTR 20 + +/* + * tests whether the interrupt was triggered in kernel. If so, jump to the + * label. Displacement tell the macro ha far is the CS value saved by the trap + * from the current %esp. The kernel code segment selector has the lower 3 bits + * zeroed + */ +#define TEST_INT_IN_KERNEL(displ, label) \ + cmpl $CS_SELECTOR, displ(%esp) ;\ + je label ; + +/* + * saves the basic interrupt context (no error code) to the process structure + * + * displ is the displacement of %esp from the original stack after trap + * pptr is the process structure pointer + * tmp is an available temporary register + */ +#define SAVE_TRAP_CTX(displ, pptr, tmp) \ + movl (0 + displ)(%esp), tmp ;\ + movl tmp, PCREG(pptr) ;\ + movl (4 + displ)(%esp), tmp ;\ + movl tmp, CSREG(pptr) ;\ + movl (8 + displ)(%esp), tmp ;\ + movl tmp, PSWREG(pptr) ;\ + movl (12 + displ)(%esp), tmp ;\ + movl tmp, SPREG(pptr) ;\ + movl tmp, STREG(pptr) ;\ + movl (16 + displ)(%esp), tmp ;\ + movl tmp, SSREG(pptr) ; + +#define SAVE_SEGS(pptr) \ + mov %ds, %ss:DSREG(pptr) ;\ + mov %es, %ss:ESREG(pptr) ;\ + mov %fs, %ss:FSREG(pptr) ;\ + mov %gs, %ss:GSREG(pptr) ; + +#define RESTORE_SEGS(pptr) \ + movw %ss:DSREG(pptr), %ds ;\ + movw %ss:ESREG(pptr), %es ;\ + movw %ss:FSREG(pptr), %fs ;\ + movw %ss:GSREG(pptr), %gs ; + +/* + * restore kernel segments, %ss is kernnel data segment, %cs is aready set and + * %fs, %gs are not used + */ +#define RESTORE_KERNEL_SEGS \ + mov %ss, %si ;\ + mov %si, %ds ;\ + mov %si, %es ; + +#define SAVE_GP_REGS(pptr) \ + mov %eax, %ss:AXREG(pptr) ;\ + mov %ecx, %ss:CXREG(pptr) ;\ + mov %edx, %ss:DXREG(pptr) ;\ + mov %ebx, %ss:BXREG(pptr) ;\ + mov %esi, %ss:SIREG(pptr) ;\ + mov %edi, %ss:DIREG(pptr) ; + +#define RESTORE_GP_REGS(pptr) \ + movl %ss:AXREG(pptr), %eax ;\ + movl %ss:CXREG(pptr), %ecx ;\ + movl %ss:DXREG(pptr), %edx ;\ + movl %ss:BXREG(pptr), %ebx ;\ + movl %ss:SIREG(pptr), %esi ;\ + movl %ss:DIREG(pptr), %edi ; + +/* + * save the context of the interrupted process to the structure in the process + * table. It pushses the %ebp to stack to get a scratch register. After %esi is + * saved, we can use it to get the saved %ebp from stack and save it to the + * final location + * + * displ is the stack displacement. In case of an exception, there are two extra + * value on the stack - error code and the exception number + */ +#define SAVE_PROCESS_CTX(displ) \ + push %ebp ;\ + ;\ + movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\ + ;\ + /* save the segment registers */ \ + SAVE_SEGS(%ebp) ;\ + \ + SAVE_GP_REGS(%ebp) ;\ + pop %esi /* get the orig %ebp and save it */ ;\ + mov %esi, %ss:BPREG(%ebp) ;\ + \ + RESTORE_KERNEL_SEGS ;\ + SAVE_TRAP_CTX(displ, %ebp, %esi) ;\ + ; + +#endif /* __SCONST_H__ */ diff --git a/kernel/arch/i386/system.c b/kernel/arch/i386/system.c index bfdd3f6f1..37cff7033 100644 --- a/kernel/arch/i386/system.c +++ b/kernel/arch/i386/system.c @@ -107,10 +107,28 @@ PUBLIC void arch_get_aout_headers(int i, struct exec *h) phys_copy(aout + i * A_MINHDR, vir2phys(h), (phys_bytes) A_MINHDR); } +PUBLIC void tss_init(struct tss_s * tss, void * kernel_stack, unsigned cpu) +{ + /* + * make space for process pointer and cpu id and point to the first + * usable word + */ + tss->sp0 = ((unsigned) kernel_stack) - 2 * sizeof(void *); + tss->ss0 = DS_SELECTOR; + + /* + * set the cpu id at the top of the stack so we know on which cpu is + * this stak in use when we trap to kernel + */ + *((reg_t *)(tss->sp0 + 1 * sizeof(reg_t))) = cpu; +} + PUBLIC void arch_init(void) { idt_init(); + tss_init(&tss, &k_boot_stktop, 0); + #if 0 /* Set CR0_EM until we get FP context switching */ write_cr0(read_cr0() | CR0_EM); @@ -389,3 +407,12 @@ PUBLIC void arch_do_syscall(struct proc *proc) /* Make the system call, for real this time. */ proc->p_reg.retreg = sys_call(call_nr, src_dst_e, m_ptr, bit_map); } + +PUBLIC struct proc * arch_finish_schedcheck(void) +{ + char * stk; + stk = (char *)tss.sp0; + /* set pointer to the process to run on the stack */ + *((reg_t *)stk) = (reg_t) proc_ptr; + return proc_ptr; +} diff --git a/kernel/glo.h b/kernel/glo.h index 25ecf45af..eef660117 100755 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -36,7 +36,6 @@ EXTERN struct proc *bill_ptr; /* process to bill for clock ticks */ EXTERN struct proc *vmrestart; /* first process on vmrestart queue */ EXTERN struct proc *vmrequest; /* first process on vmrequest queue */ EXTERN struct proc *pagefaults; /* first process on pagefault queue */ -EXTERN char k_reenter; /* kernel reentry count (entry count less 1) */ EXTERN unsigned lost_ticks; /* clock ticks counted outside clock task */ diff --git a/kernel/proc.c b/kernel/proc.c index e3bdc05ae..f58a7332d 100755 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -125,7 +125,7 @@ PRIVATE int QueueMess(endpoint_t ep, vir_bytes msg_lin, struct proc *dst) /*===========================================================================* * schedcheck * *===========================================================================*/ -PUBLIC void schedcheck(void) +PUBLIC struct proc * schedcheck(void) { /* This function is called an instant before proc_ptr is * to be scheduled again. @@ -214,7 +214,10 @@ PUBLIC void schedcheck(void) #if DEBUG_TRACE proc_ptr->p_schedules++; #endif - NOREC_RETURN(schedch, ); + + proc_ptr = arch_finish_schedcheck(); + + NOREC_RETURN(schedch, proc_ptr); } /*===========================================================================* diff --git a/kernel/proto.h b/kernel/proto.h index c44b8d31f..b442fe4da 100755 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -36,7 +36,8 @@ _PROTOTYPE( int lock_send, (int dst, message *m_ptr) ); _PROTOTYPE( void enqueue, (struct proc *rp) ); _PROTOTYPE( void dequeue, (struct proc *rp) ); _PROTOTYPE( void balance_queues, (struct timer *tp) ); -_PROTOTYPE( void schedcheck, (void) ); +_PROTOTYPE( struct proc * schedcheck, (void) ); +_PROTOTYPE( struct proc * arch_finish_schedcheck, (void) ); _PROTOTYPE( struct proc *endpoint_lookup, (endpoint_t ep) ); #if DEBUG_ENABLE_IPC_WARNINGS _PROTOTYPE( int isokendpt_f, (char *file, int line, endpoint_t e, int *p, int f)); @@ -105,6 +106,7 @@ _PROTOTYPE( void stop_profile_clock, (void) ); _PROTOTYPE( phys_bytes phys_copy, (phys_bytes source, phys_bytes dest, phys_bytes count) ); _PROTOTYPE( void phys_copy_fault, (void)); +_PROTOTYPE( void phys_copy_fault_in_kernel, (void)); #define virtual_copy(src, dst, bytes) virtual_copy_f(src, dst, bytes, 0) #define virtual_copy_vmcheck(src, dst, bytes) virtual_copy_f(src, dst, bytes, 1) _PROTOTYPE( int virtual_copy_f, (struct vir_addr *src, struct vir_addr *dst,