diff --git a/kernel/arch/i386/arch_do_vmctl.c b/kernel/arch/i386/arch_do_vmctl.c index 523165d82..2a8d526a2 100644 --- a/kernel/arch/i386/arch_do_vmctl.c +++ b/kernel/arch/i386/arch_do_vmctl.c @@ -10,7 +10,9 @@ #include "../../system.h" #include -extern u32_t kernel_cr3; +#include "proto.h" + +extern u32_t *vm_pagedirs; /*===========================================================================* * arch_do_vmctl * @@ -30,7 +32,7 @@ struct proc *p; p->p_seg.p_cr3 = m_ptr->SVMCTL_VALUE; p->p_misc_flags |= MF_FULLVM; } else { - p->p_seg.p_cr3 = kernel_cr3; + p->p_seg.p_cr3 = 0; p->p_misc_flags &= ~MF_FULLVM; } RTS_LOCK_UNSET(p, VMINHIBIT); @@ -53,8 +55,33 @@ struct proc *p; m_ptr->SVMCTL_PF_I386_ERR = rp->p_pagefault.pf_flags; return OK; } + case VMCTL_I386_KERNELLIMIT: + { + int r; + /* VM wants kernel to increase its segment. */ + r = prot_set_kern_seg_limit(m_ptr->SVMCTL_VALUE); + return r; + } + case VMCTL_I386_PAGEDIRS: + { + int pde; + vm_pagedirs = (u32_t *) m_ptr->SVMCTL_VALUE; + return OK; + } + case VMCTL_I386_FREEPDE: + { + i386_freepde(m_ptr->SVMCTL_VALUE); + return OK; + } + case VMCTL_FLUSHTLB: + { + level0(reload_cr3); + return OK; + } } + + kprintf("arch_do_vmctl: strange param %d\n", m_ptr->SVMCTL_PARAM); return EINVAL; } diff --git a/kernel/arch/i386/do_sdevio.c b/kernel/arch/i386/do_sdevio.c index 7443c132f..63b152aff 100644 --- a/kernel/arch/i386/do_sdevio.c +++ b/kernel/arch/i386/do_sdevio.c @@ -24,6 +24,8 @@ PUBLIC int do_sdevio(m_ptr) register message *m_ptr; /* pointer to request message */ { + vir_bytes newoffset; + endpoint_t newep; int proc_nr, proc_nr_e = m_ptr->DIO_VEC_ENDPT; int count = m_ptr->DIO_VEC_SIZE; long port = m_ptr->DIO_PORT; @@ -32,6 +34,9 @@ register message *m_ptr; /* pointer to request message */ struct proc *rp; struct priv *privp; struct io_range *iorp; + int rem; + vir_bytes addr; + struct proc *destproc; /* Allow safe copies and accesses to SELF */ if ((m_ptr->DIO_REQUEST & _DIO_SAFEMASK) != _DIO_SAFE && @@ -64,11 +69,23 @@ register message *m_ptr; /* pointer to request message */ /* Check for 'safe' variants. */ if((m_ptr->DIO_REQUEST & _DIO_SAFEMASK) == _DIO_SAFE) { /* Map grant address to physical address. */ - if ((phys_buf = umap_verify_grant(proc_addr(proc_nr), who_e, + if(verify_grant(proc_nr_e, who_e, (vir_bytes) m_ptr->DIO_VEC_ADDR, - (vir_bytes) m_ptr->DIO_OFFSET, count, - req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ)) == 0) - return(EPERM); + count, + req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ, + (vir_bytes) m_ptr->DIO_OFFSET, + &newoffset, &newep) != OK) { + printf("do_sdevio: verify_grant failed\n"); + return EPERM; + } + if(!isokendpt(newep, &proc_nr)) + return(EINVAL); + destproc = proc_addr(proc_nr); + if ((phys_buf = umap_local(destproc, D, + (vir_bytes) newoffset, count)) == 0) { + printf("do_sdevio: umap_local failed\n"); + return(EFAULT); + } } else { if(proc_nr != who_p) { @@ -77,10 +94,14 @@ register message *m_ptr; /* pointer to request message */ return EPERM; } /* Get and check physical address. */ - if ((phys_buf = umap_virtual(proc_addr(proc_nr), D, + if ((phys_buf = umap_local(proc_addr(proc_nr), D, (vir_bytes) m_ptr->DIO_VEC_ADDR, count)) == 0) return(EFAULT); + destproc = proc_addr(proc_nr); } + /* current process must be target for phys_* to be OK */ + + vm_set_cr3(destproc); switch (io_type) { diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index 35fb55440..7e54f2745 100755 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -10,59 +10,87 @@ #include #include #include "../../proc.h" +#include "../../proto.h" +#include "../../vm.h" -extern int vm_copy_in_progress; +extern int vm_copy_in_progress, catch_pagefaults; extern struct proc *vm_copy_from, *vm_copy_to; -extern u32_t vm_copy_from_v, vm_copy_to_v; -extern u32_t vm_copy_from_p, vm_copy_to_p, vm_copy_cr3; -u32_t pagefault_cr2, pagefault_count = 0; - -void pagefault(struct proc *pr, int trap_errno) +void pagefault(vir_bytes old_eip, struct proc *pr, int trap_errno, + u32_t *old_eipptr, u32_t *old_eaxptr, u32_t pagefaultcr2) { int s; vir_bytes ph; u32_t pte; + int procok = 0, pcok = 0, rangeok = 0; + int in_physcopy = 0; + vir_bytes test_eip; - if(pagefault_count != 1) - minix_panic("recursive pagefault", pagefault_count); + vmassert(old_eipptr); + vmassert(old_eaxptr); - /* Don't schedule this process until pagefault is handled. */ - if(RTS_ISSET(pr, PAGEFAULT)) - minix_panic("PAGEFAULT set", pr->p_endpoint); - RTS_LOCK_SET(pr, PAGEFAULT); + vmassert(*old_eipptr == old_eip); + vmassert(old_eipptr != &old_eip); - if(pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) { +#if 0 + printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n", + pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3()); +#endif + + if(pr->p_seg.p_cr3) { + vmassert(pr->p_seg.p_cr3 == read_cr3()); + } + + test_eip = k_reenter ? old_eip : pr->p_reg.pc; + + in_physcopy = (test_eip > (vir_bytes) phys_copy) && + (test_eip < (vir_bytes) phys_copy_fault); + + if((k_reenter || iskernelp(pr)) && + catch_pagefaults && in_physcopy) { +#if 0 + printf("pf caught! addr 0x%lx\n", pagefaultcr2); +#endif + *old_eipptr = (u32_t) phys_copy_fault; + *old_eaxptr = pagefaultcr2; + + return; + } + + /* System processes that don't have their own page table can't + * have page faults. VM does have its own page table but also + * can't have page faults (because VM has to handle them). + */ + if(k_reenter || (pr->p_endpoint <= INIT_PROC_NR && + !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) { /* Page fault we can't / don't want to * handle. */ - kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x\n", + kprintf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, k_reenter %d\n", pr->p_endpoint, pr->p_name, pr->p_reg.pc, - pagefault_cr2, trap_errno); + pagefaultcr2, trap_errno, k_reenter); proc_stacktrace(pr); minix_panic("page fault in system process", pr->p_endpoint); return; } + /* Don't schedule this process until pagefault is handled. */ + vmassert(pr->p_seg.p_cr3 == read_cr3()); + vmassert(!RTS_ISSET(pr, PAGEFAULT)); + RTS_LOCK_SET(pr, PAGEFAULT); + /* Save pagefault details, suspend process, * add process to pagefault chain, * and tell VM there is a pagefault to be * handled. */ - pr->p_pagefault.pf_virtual = pagefault_cr2; + pr->p_pagefault.pf_virtual = pagefaultcr2; pr->p_pagefault.pf_flags = trap_errno; pr->p_nextpagefault = pagefaults; pagefaults = pr; - lock_notify(HARDWARE, VM_PROC_NR); - - pagefault_count = 0; - -#if 0 - kprintf("pagefault for process %d ('%s'), pc = 0x%x\n", - pr->p_endpoint, pr->p_name, pr->p_reg.pc); - proc_stacktrace(pr); -#endif + + mini_notify(proc_addr(HARDWARE), VM_PROC_NR); return; } @@ -70,12 +98,16 @@ void pagefault(struct proc *pr, int trap_errno) /*===========================================================================* * exception * *===========================================================================*/ -PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags) +PUBLIC void exception(vec_nr, trap_errno, old_eip, old_cs, old_eflags, + old_eipptr, old_eaxptr, pagefaultcr2) unsigned vec_nr; u32_t trap_errno; u32_t old_eip; U16_t old_cs; u32_t old_eflags; +u32_t *old_eipptr; +u32_t *old_eaxptr; +u32_t pagefaultcr2; { /* An exception or unexpected interrupt has occurred. */ @@ -108,16 +140,14 @@ struct proc *t; register struct ex_s *ep; struct proc *saved_proc; -#if DEBUG_SCHED_CHECK - for (t = BEG_PROC_ADDR; t < END_PROC_ADDR; ++t) { - if(t->p_magic != PMAGIC) - kprintf("entry %d broken\n", t->p_nr); + if(k_reenter > 2) { + /* This can't end well. */ + minix_panic("exception: k_reenter too high", k_reenter); } -#endif /* Save proc_ptr, because it may be changed by debug statements. */ saved_proc = proc_ptr; - + ep = &ex_data[vec_nr]; if (vec_nr == 2) { /* spurious NMI on some machines */ @@ -126,8 +156,9 @@ struct proc *t; } if(vec_nr == PAGE_FAULT_VECTOR) { - pagefault(saved_proc, trap_errno); - return; + pagefault(old_eip, saved_proc, trap_errno, + old_eipptr, old_eaxptr, pagefaultcr2); + return; } /* If an exception occurs while running a process, the k_reenter variable @@ -137,22 +168,19 @@ struct proc *t; if (k_reenter == 0 && ! iskernelp(saved_proc)) { { - kprintf( -"exception for process %d, endpoint %d ('%s'), pc = 0x%x:0x%x, sp = 0x%x:0x%x\n", - proc_nr(saved_proc), saved_proc->p_endpoint, - saved_proc->p_name, - saved_proc->p_reg.cs, saved_proc->p_reg.pc, - saved_proc->p_reg.ss, saved_proc->p_reg.sp); kprintf( "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n", vec_nr, (unsigned long)trap_errno, (unsigned long)old_eip, old_cs, (unsigned long)old_eflags); + printseg("cs: ", 1, saved_proc, old_cs); + printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); + if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { + printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); + } proc_stacktrace(saved_proc); } - kprintf("kernel: cause_sig %d for %d\n", - ep->signum, saved_proc->p_endpoint); cause_sig(proc_nr(saved_proc), ep->signum); return; } @@ -168,7 +196,7 @@ struct proc *t; vec_nr, trap_errno, old_eip, old_cs, old_eflags); /* TODO should we enable this only when compiled for some debug mode? */ if (saved_proc) { - kprintf("process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); + kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); kprintf("pc = %u:0x%x\n", (unsigned) saved_proc->p_reg.cs, (unsigned) saved_proc->p_reg.pc); proc_stacktrace(saved_proc); @@ -184,24 +212,30 @@ struct proc *t; /*===========================================================================* * stacktrace * *===========================================================================*/ -PUBLIC void proc_stacktrace(struct proc *proc) +PUBLIC void proc_stacktrace(struct proc *whichproc) { reg_t bp, v_bp, v_pc, v_hbp; + int iskernel; - v_bp = proc->p_reg.fp; + v_bp = whichproc->p_reg.fp; - kprintf("%8.8s %6d 0x%lx ", - proc->p_name, proc->p_endpoint, proc->p_reg.pc); + iskernel = iskernelp(whichproc); + + kprintf("%-8.8s %6d 0x%lx ", + whichproc->p_name, whichproc->p_endpoint, whichproc->p_reg.pc); while(v_bp) { - if(data_copy(proc->p_endpoint, v_bp, - SYSTEM, (vir_bytes) &v_hbp, sizeof(v_hbp)) != OK) { + +#define PRCOPY(pr, pv, v, n) \ + (iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \ + data_copy(pr->p_endpoint, pv, SYSTEM, (vir_bytes) (v), n)) + + if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) { kprintf("(v_bp 0x%lx ?)", v_bp); break; } - if(data_copy(proc->p_endpoint, v_bp + sizeof(v_pc), - SYSTEM, (vir_bytes) &v_pc, sizeof(v_pc)) != OK) { - kprintf("(v_pc 0x%lx ?)", v_pc); + if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) { + kprintf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc)); break; } kprintf("0x%lx ", (unsigned long) v_pc); diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index d968b7b15..0bf7a3cbd 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -54,6 +54,7 @@ #define INTR_PRIVILEGE 0 /* kernel and interrupt handlers */ #define TASK_PRIVILEGE 1 /* kernel tasks */ #define USER_PRIVILEGE 3 /* servers and user processes */ +#define RPL_MASK 0x03 /* bits in selector RPL */ /* 286 hardware constants. */ @@ -137,5 +138,6 @@ #define IOPL_MASK 0x003000 #define vir2phys(vir) (kinfo.data_base + (vir_bytes) (vir)) +#define phys2vir(ph) ((vir_bytes) (ph) - kinfo.data_base) #endif /* _I386_ACONST_H */ diff --git a/kernel/arch/i386/include/archtypes.h b/kernel/arch/i386/include/archtypes.h index ac17eeb7f..bdd8f8246 100644 --- a/kernel/arch/i386/include/archtypes.h +++ b/kernel/arch/i386/include/archtypes.h @@ -56,7 +56,7 @@ struct segdesc_s { /* segment descriptor for protected mode */ typedef struct segframe { reg_t p_ldt_sel; /* selector in gdt with ldt base and limit */ reg_t p_cr3; /* page table root */ - struct segdesc_s p_ldt[2+NR_REMOTE_SEGS]; /* CS, DS and remote */ + struct segdesc_s p_ldt[LDT_SIZE]; /* CS, DS and remote */ } segframe_t; /* Page fault event. Stored in process table. Only valid if PAGEFAULT @@ -68,5 +68,7 @@ struct pagefault u32_t pf_flags; /* Pagefault flags on stack. */ }; +#define INMEMORY(p) (!p->p_seg.p_cr3 || ptproc == p) + #endif /* #ifndef _I386_TYPES_H */ diff --git a/kernel/arch/i386/klib386.s b/kernel/arch/i386/klib386.s index 0aff6c0bd..cc6cd748e 100755 --- a/kernel/arch/i386/klib386.s +++ b/kernel/arch/i386/klib386.s @@ -8,7 +8,6 @@ #include #include #include "../../const.h" -#include "vm.h" #include "sconst.h" ! This file contains a number of assembly code utility routines needed by the @@ -28,6 +27,7 @@ .define _intr_unmask ! enable an irq at the 8259 controller .define _intr_mask ! disable an irq .define _phys_copy ! copy data from anywhere to anywhere in memory +.define _phys_copy_fault! phys_copy pagefault .define _phys_memset ! write pattern anywhere in memory .define _mem_rdw ! copy one word from [segment:offset] .define _reset ! reset the system @@ -35,13 +35,12 @@ .define _level0 ! call a function at level 0 .define _read_cpu_flags ! read the cpu flags .define _read_cr0 ! read cr0 -.define _write_cr3 ! write cr3 -.define _last_cr3 +.define _getcr3val .define _write_cr0 ! write a value in cr0 .define _read_cr4 +.define _thecr3 .define _write_cr4 - -.define _kernel_cr3 +.define _catch_pagefaults ! The routines only guarantee to preserve the registers the C compiler ! expects to be preserved (ebx, esi, edi, ebp, esp, segment registers, and @@ -156,55 +155,6 @@ csinit: mov eax, DS_SELECTOR ret -!*===========================================================================* -!* cp_mess * -!*===========================================================================* -! PUBLIC void cp_mess(int src, phys_clicks src_clicks, vir_bytes src_offset, -! phys_clicks dst_clicks, vir_bytes dst_offset); -! This routine makes a fast copy of a message from anywhere in the address -! space to anywhere else. It also copies the source address provided as a -! parameter to the call into the first word of the destination message. -! -! Note that the message size, "Msize" is in DWORDS (not bytes) and must be set -! correctly. Changing the definition of message in the type file and not -! changing it here will lead to total disaster. -! -!CM_ARGS = 4 + 4 + 4 + 4 + 4 ! 4 + 4 + 4 + 4 + 4 -!! es ds edi esi eip proc scl sof dcl dof -! -! .align 16 -!_cp_mess: -! cld -! push esi -! push edi -! push ds -! push es -! -! mov eax, FLAT_DS_SELECTOR -! mov ds, ax -! mov es, ax -! -! mov esi, CM_ARGS+4(esp) ! src clicks -! shl esi, CLICK_SHIFT -! add esi, CM_ARGS+4+4(esp) ! src offset -! mov edi, CM_ARGS+4+4+4(esp) ! dst clicks -! shl edi, CLICK_SHIFT -! add edi, CM_ARGS+4+4+4+4(esp) ! dst offset -! -! mov eax, CM_ARGS(esp) ! process number of sender -! stos ! copy number of sender to dest message -! add esi, 4 ! do not copy first word -! mov ecx, Msize - 1 ! remember, first word does not count -! rep -! movs ! copy the message -! -! pop es -! pop ds -! pop edi -! pop esi -! ret ! that is all folks! -! - !*===========================================================================* !* exit * !*===========================================================================* @@ -236,8 +186,6 @@ _phys_insw: push edi push es - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov es, cx mov edx, 8(ebp) ! port to read from @@ -264,8 +212,6 @@ _phys_insb: push edi push es - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov es, cx mov edx, 8(ebp) ! port to read from @@ -293,8 +239,6 @@ _phys_outsw: push esi push ds - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov ds, cx mov edx, 8(ebp) ! port to write to @@ -322,8 +266,6 @@ _phys_outsb: push esi push ds - LOADKERNELCR3 - mov ecx, FLAT_DS_SELECTOR mov ds, cx mov edx, 8(ebp) ! port to write to @@ -416,7 +358,7 @@ dis_already: !*===========================================================================* !* phys_copy * !*===========================================================================* -! PUBLIC void phys_copy(phys_bytes source, phys_bytes destination, +! PUBLIC phys_bytes phys_copy(phys_bytes source, phys_bytes destination, ! phys_bytes bytecount); ! Copy a block of physical memory. @@ -430,8 +372,6 @@ _phys_copy: push edi push es - LOADKERNELCR3 - mov eax, FLAT_DS_SELECTOR mov es, ax @@ -457,6 +397,8 @@ pc_small: rep eseg movsb + mov eax, 0 ! 0 means: no fault +_phys_copy_fault: ! kernel can send us here pop es pop edi pop esi @@ -477,8 +419,6 @@ _phys_memset: push ebx push ds - LOADKERNELCR3 - mov esi, 8(ebp) mov eax, 16(ebp) mov ebx, FLAT_DS_SELECTOR @@ -633,14 +573,13 @@ _write_cr4: pop ebp ret + !*===========================================================================* -!* write_cr3 * +!* getcr3val * !*===========================================================================* -! PUBLIC void write_cr3(unsigned long value); -_write_cr3: - push ebp - mov ebp, esp - LOADCR3WITHEAX(0x22, 8(ebp)) - pop ebp +! PUBLIC unsigned long getcr3val(void); +_getcr3val: + mov eax, cr3 + mov (_thecr3), eax ret diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index a04b0e94c..11f673285 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -1,4 +1,5 @@ + #include "../../kernel.h" #include "../../proc.h" #include "../../vm.h" @@ -15,137 +16,246 @@ #include "proto.h" #include "../../proto.h" +#include "../../proto.h" #include "../../debug.h" -/* VM functions and data. */ -PRIVATE u32_t vm_cr3; -PUBLIC u32_t kernel_cr3; -extern u32_t cswitch; -u32_t last_cr3 = 0; +#include "sha1.h" + +PRIVATE int psok = 0; + +#define PROCPDEPTR(pr, pi) ((u32_t *) ((u8_t *) vm_pagedirs +\ + I386_PAGE_SIZE * pr->p_nr + \ + I386_VM_PT_ENT_SIZE * pi)) + +u8_t *vm_pagedirs = NULL; + +#define NOPDE -1 +#define PDEMASK(n) (1L << (n)) +PUBLIC u32_t dirtypde; +#define WANT_FREEPDES (sizeof(dirtypde)*8-5) +PRIVATE int nfreepdes = 0, freepdes[WANT_FREEPDES], inusepde = NOPDE; #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0) -FORWARD _PROTOTYPE( void phys_put32, (phys_bytes addr, u32_t value) ); -FORWARD _PROTOTYPE( u32_t phys_get32, (phys_bytes addr) ); -FORWARD _PROTOTYPE( void vm_set_cr3, (u32_t value) ); +FORWARD _PROTOTYPE( u32_t phys_get32, (vir_bytes v) ); FORWARD _PROTOTYPE( void set_cr3, (void) ); FORWARD _PROTOTYPE( void vm_enable_paging, (void) ); -#if DEBUG_VMASSERT -#define vmassert(t) { \ - if(!(t)) { minix_panic("vm: assert " #t " failed\n", __LINE__); } } -#else -#define vmassert(t) { } -#endif - + /* *** Internal VM Functions *** */ -PUBLIC void vm_init(void) +PUBLIC void vm_init(struct proc *newptproc) { - int o; - phys_bytes p, pt_size; - phys_bytes vm_dir_base, vm_pt_base, phys_mem; - u32_t entry; - unsigned pages; - struct proc* rp; - struct proc *sys = proc_addr(SYSTEM); - static int init_done = 0; - - if (!vm_size) - minix_panic("i386_vm_init: no space for page tables", NO_NUM); - - if(init_done) - return; - - /* Align page directory */ - o= (vm_base % I386_PAGE_SIZE); - if (o != 0) - o= I386_PAGE_SIZE-o; - vm_dir_base= vm_base+o; - - /* Page tables start after the page directory */ - vm_pt_base= vm_dir_base+I386_PAGE_SIZE; - - pt_size= (vm_base+vm_size)-vm_pt_base; - pt_size -= (pt_size % I386_PAGE_SIZE); - - /* Compute the number of pages based on vm_mem_high */ - pages= (vm_mem_high-1)/I386_PAGE_SIZE + 1; - - if (pages * I386_VM_PT_ENT_SIZE > pt_size) - minix_panic("i386_vm_init: page table too small", NO_NUM); - - for (p= 0; p*I386_VM_PT_ENT_SIZE < pt_size; p++) - { - phys_mem= p*I386_PAGE_SIZE; - entry= phys_mem | I386_VM_USER | I386_VM_WRITE | - I386_VM_PRESENT; - if (phys_mem >= vm_mem_high) - entry= 0; -#if VM_KERN_NOPAGEZERO - if (phys_mem == (sys->p_memmap[T].mem_phys << CLICK_SHIFT) || - phys_mem == (sys->p_memmap[D].mem_phys << CLICK_SHIFT)) { - entry = 0; - } -#endif - phys_put32(vm_pt_base + p*I386_VM_PT_ENT_SIZE, entry); - } - - for (p= 0; p < I386_VM_DIR_ENTRIES; p++) - { - phys_mem= vm_pt_base + p*I386_PAGE_SIZE; - entry= phys_mem | I386_VM_USER | I386_VM_WRITE | - I386_VM_PRESENT; - if (phys_mem >= vm_pt_base + pt_size) - entry= 0; - phys_put32(vm_dir_base + p*I386_VM_PT_ENT_SIZE, entry); - } - - - /* Set this cr3 in all currently running processes for - * future context switches. - */ - for (rp=BEG_PROC_ADDR; rpp_seg.p_cr3 = vm_dir_base; - } - - kernel_cr3 = vm_dir_base; - - /* Set this cr3 now (not active until paging enabled). */ - vm_set_cr3(vm_dir_base); - - /* Actually enable paging (activating cr3 load above). */ + int i; + if(vm_running) + minix_panic("vm_init: vm_running", NO_NUM); + vm_set_cr3(newptproc); level0(vm_enable_paging); - - /* Don't do this init in the future. */ - init_done = 1; vm_running = 1; + } -PRIVATE void phys_put32(addr, value) -phys_bytes addr; -u32_t value; -{ - phys_copy(vir2phys((vir_bytes)&value), addr, sizeof(value)); + +#define TYPEDIRECT 0 +#define TYPEPROCMAP 1 +#define TYPEPHYS 2 + +/* This macro sets up a mapping from within the kernel's address + * space to any other area of memory, either straight physical + * memory (PROC == NULL) or a process view of memory, in 4MB chunks. + * It recognizes PROC having kernel address space as a special case. + * + * It sets PTR to the pointer within kernel address space at the start + * of the 4MB chunk, and OFFSET to the offset within that chunk + * that corresponds to LINADDR. + * + * It needs FREEPDE (available and addressable PDE within kernel + * address space), SEG (hardware segment), VIRT (in-datasegment + * address if known). + */ +#define CREATEPDE(PROC, PTR, LINADDR, REMAIN, BYTES, PDE, TYPE) { \ + u32_t *pdeptr = NULL; \ + int proc_pde_index; \ + proc_pde_index = I386_VM_PDE(LINADDR); \ + PDE = NOPDE; \ + if((PROC) && (((PROC) == ptproc) || !HASPT(PROC))) { \ + PTR = LINADDR; \ + TYPE = TYPEDIRECT; \ + } else { \ + int fp; \ + int mustinvl; \ + u32_t pdeval, *pdevalptr, mask; \ + phys_bytes offset; \ + vmassert(psok); \ + if(PROC) { \ + TYPE = TYPEPROCMAP; \ + vmassert(!iskernelp(PROC)); \ + vmassert(HASPT(PROC)); \ + pdeptr = PROCPDEPTR(PROC, proc_pde_index); \ + pdeval = *pdeptr; \ + } else { \ + TYPE = TYPEPHYS; \ + pdeval = (LINADDR & I386_VM_ADDR_MASK_4MB) | \ + I386_VM_BIGPAGE | I386_VM_PRESENT | \ + I386_VM_WRITE | I386_VM_USER; \ + } \ + for(fp = 0; fp < nfreepdes; fp++) { \ + int k = freepdes[fp]; \ + if(inusepde == k) \ + continue; \ + *PROCPDEPTR(ptproc, k) = 0; \ + PDE = k; \ + vmassert(k >= 0); \ + vmassert(k < sizeof(dirtypde)*8); \ + mask = PDEMASK(PDE); \ + if(dirtypde & mask) \ + continue; \ + break; \ + } \ + vmassert(PDE != NOPDE); \ + vmassert(mask); \ + if(dirtypde & mask) { \ + mustinvl = 1; \ + } else { \ + mustinvl = 0; \ + } \ + inusepde = PDE; \ + *PROCPDEPTR(ptproc, PDE) = pdeval; \ + offset = LINADDR & I386_VM_OFFSET_MASK_4MB; \ + PTR = I386_BIG_PAGE_SIZE*PDE + offset; \ + REMAIN = MIN(REMAIN, I386_BIG_PAGE_SIZE - offset); \ + if(mustinvl) { \ + level0(reload_cr3); \ + } \ + } \ } +#define DONEPDE(PDE) { \ + if(PDE != NOPDE) { \ + vmassert(PDE > 0); \ + vmassert(PDE < sizeof(dirtypde)*8); \ + dirtypde |= PDEMASK(PDE); \ + } \ +} + +#define WIPEPDE(PDE) { \ + if(PDE != NOPDE) { \ + vmassert(PDE > 0); \ + vmassert(PDE < sizeof(dirtypde)*8); \ + *PROCPDEPTR(ptproc, PDE) = 0; \ + } \ +} + +/*===========================================================================* + * lin_lin_copy * + *===========================================================================*/ +int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr, + struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes) +{ + u32_t addr; + int o1, o2; + int procslot; + int firstloop = 1; + + NOREC_ENTER(linlincopy); + + vmassert(vm_running); + vmassert(nfreepdes >= 3); + + vmassert(ptproc); + vmassert(proc_ptr); + vmassert(read_cr3() == ptproc->p_seg.p_cr3); + + procslot = ptproc->p_nr; + + vmassert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES); + + while(bytes > 0) { + phys_bytes srcptr, dstptr; + vir_bytes chunk = bytes; + int srcpde, dstpde; + int srctype, dsttype; + + /* Set up 4MB ranges. */ + inusepde = NOPDE; + CREATEPDE(srcproc, srcptr, srclinaddr, chunk, bytes, srcpde, srctype); + CREATEPDE(dstproc, dstptr, dstlinaddr, chunk, bytes, dstpde, dsttype); + + /* Copy pages. */ + PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr); + + DONEPDE(srcpde); + DONEPDE(dstpde); + + if(addr) { + /* If addr is nonzero, a page fault was caught. */ + + if(addr >= srcptr && addr < (srcptr + chunk)) { + WIPEPDE(srcpde); + WIPEPDE(dstpde); + NOREC_RETURN(linlincopy, EFAULT_SRC); + } + if(addr >= dstptr && addr < (dstptr + chunk)) { + WIPEPDE(srcpde); + WIPEPDE(dstpde); + NOREC_RETURN(linlincopy, EFAULT_DST); + } + + minix_panic("lin_lin_copy fault out of range", NO_NUM); + + /* Not reached. */ + NOREC_RETURN(linlincopy, EFAULT); + } + + WIPEPDE(srcpde); + WIPEPDE(dstpde); + + /* Update counter and addresses for next iteration, if any. */ + bytes -= chunk; + srclinaddr += chunk; + dstlinaddr += chunk; + + firstloop = 0; + } + + NOREC_RETURN(linlincopy, OK); +} + + PRIVATE u32_t phys_get32(addr) phys_bytes addr; { - u32_t value; + u32_t v; + int r; - phys_copy(addr, vir2phys((vir_bytes)&value), sizeof(value)); + if(!vm_running) { + phys_copy(addr, vir2phys(&v), sizeof(v)); + return v; + } - return value; + if((r=lin_lin_copy(NULL, addr, + proc_addr(SYSTEM), vir2phys(&v), sizeof(v))) != OK) { + minix_panic("lin_lin_copy for phys_get32 failed", r); + } + + return v; } -PRIVATE void vm_set_cr3(value) -u32_t value; +PRIVATE u32_t vm_cr3; /* temp arg to level0() func */ + +PUBLIC void vm_set_cr3(struct proc *newptproc) { - vm_cr3= value; - level0(set_cr3); + int u = 0; + if(!intr_disabled()) { lock; u = 1; } + vm_cr3= newptproc->p_seg.p_cr3; + if(vm_cr3) { + vmassert(intr_disabled()); + level0(set_cr3); + vmassert(intr_disabled()); + ptproc = newptproc; + vmassert(intr_disabled()); + } + if(u) { unlock; } } PRIVATE void set_cr3() @@ -153,10 +263,42 @@ PRIVATE void set_cr3() write_cr3(vm_cr3); } +char *cr0_str(u32_t e) +{ + static char str[80]; + strcpy(str, ""); +#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0) + FLAG(I386_CR0_PE); + FLAG(I386_CR0_MP); + FLAG(I386_CR0_EM); + FLAG(I386_CR0_TS); + FLAG(I386_CR0_ET); + FLAG(I386_CR0_PG); + FLAG(I386_CR0_WP); + if(e) { strcat(str, " (++)"); } + return str; +} + +char *cr4_str(u32_t e) +{ + static char str[80]; + strcpy(str, ""); + FLAG(I386_CR4_VME); + FLAG(I386_CR4_PVI); + FLAG(I386_CR4_TSD); + FLAG(I386_CR4_DE); + FLAG(I386_CR4_PSE); + FLAG(I386_CR4_PAE); + FLAG(I386_CR4_MCE); + FLAG(I386_CR4_PGE); + if(e) { strcat(str, " (++)"); } + return str; +} + PRIVATE void vm_enable_paging(void) { u32_t cr0, cr4; - int psok, pgeok; + int pgeok; psok = _cpufeature(_CPUF_I386_PSE); pgeok = _cpufeature(_CPUF_I386_PGE); @@ -166,19 +308,26 @@ PRIVATE void vm_enable_paging(void) /* First clear PG and PGE flag, as PGE must be enabled after PG. */ write_cr0(cr0 & ~I386_CR0_PG); - write_cr4(cr4 & ~I386_CR4_PGE); + write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE)); cr0= read_cr0(); cr4= read_cr4(); + /* Our first page table contains 4MB entries. */ + if(psok) + cr4 |= I386_CR4_PSE; + + write_cr4(cr4); + /* First enable paging, then enable global page flag. */ - write_cr0(cr0 | I386_CR0_PG); + cr0 |= I386_CR0_PG; + write_cr0(cr0 ); + cr0 |= I386_CR0_WP; + write_cr0(cr0); /* May we enable these features? */ if(pgeok) cr4 |= I386_CR4_PGE; - if(psok) - cr4 |= I386_CR4_PSE; write_cr4(cr4); } @@ -315,6 +464,7 @@ vir_bytes bytes; /* # of bytes to be copied */ return phys; } + /*===========================================================================* * vm_lookup * *===========================================================================*/ @@ -323,6 +473,7 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, u32_t *root, *pt; int pde, pte; u32_t pde_v, pte_v; + NOREC_ENTER(vmlookup); vmassert(proc); vmassert(physical); @@ -330,7 +481,7 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, if(!HASPT(proc)) { *physical = virtual; - return OK; + NOREC_RETURN(vmlookup, OK); } /* Retrieve page directory entry. */ @@ -339,39 +490,35 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, pde = I386_VM_PDE(virtual); vmassert(pde >= 0 && pde < I386_VM_DIR_ENTRIES); pde_v = phys_get32((u32_t) (root + pde)); + if(!(pde_v & I386_VM_PRESENT)) { -#if 0 - kprintf("vm_lookup: %d:%s:0x%lx: cr3 0x%lx: pde %d not present\n", - proc->p_endpoint, proc->p_name, virtual, root, pde); - kprintf("kernel stack: "); - util_stacktrace(); -#endif - return EFAULT; + NOREC_RETURN(vmlookup, EFAULT); } - /* Retrieve page table entry. */ - pt = (u32_t *) I386_VM_PFA(pde_v); - vmassert(!((u32_t) pt % I386_PAGE_SIZE)); - pte = I386_VM_PTE(virtual); - vmassert(pte >= 0 && pte < I386_VM_PT_ENTRIES); - pte_v = phys_get32((u32_t) (pt + pte)); - if(!(pte_v & I386_VM_PRESENT)) { -#if 0 - kprintf("vm_lookup: %d:%s:0x%lx: cr3 %lx: pde %d: pte %d not present\n", - proc->p_endpoint, proc->p_name, virtual, root, pde, pte); - kprintf("kernel stack: "); - util_stacktrace(); -#endif - return EFAULT; + /* We don't expect to ever see this. */ + if(pde_v & I386_VM_BIGPAGE) { + *physical = pde_v & I386_VM_ADDR_MASK_4MB; + if(ptent) *ptent = pde_v; + *physical += virtual & I386_VM_OFFSET_MASK_4MB; + } else { + /* Retrieve page table entry. */ + pt = (u32_t *) I386_VM_PFA(pde_v); + vmassert(!((u32_t) pt % I386_PAGE_SIZE)); + pte = I386_VM_PTE(virtual); + vmassert(pte >= 0 && pte < I386_VM_PT_ENTRIES); + pte_v = phys_get32((u32_t) (pt + pte)); + if(!(pte_v & I386_VM_PRESENT)) { + NOREC_RETURN(vmlookup, EFAULT); + } + + if(ptent) *ptent = pte_v; + + /* Actual address now known; retrieve it and add page offset. */ + *physical = I386_VM_PFA(pte_v); + *physical += virtual % I386_PAGE_SIZE; } - if(ptent) *ptent = pte_v; - - /* Actual address now known; retrieve it and add page offset. */ - *physical = I386_VM_PFA(pte_v); - *physical += virtual % I386_PAGE_SIZE; - - return OK; + NOREC_RETURN(vmlookup, OK); } /* From virtual address v in process p, @@ -390,54 +537,6 @@ PUBLIC int vm_lookup(struct proc *proc, vir_bytes virtual, vir_bytes *physical, return r; \ } } } -/*===========================================================================* - * vm_copy * - *===========================================================================*/ -int vm_copy(vir_bytes src, struct proc *srcproc, - vir_bytes dst, struct proc *dstproc, phys_bytes bytes) -{ -#define WRAPS(v) (ULONG_MAX - (v) <= bytes) - - if(WRAPS(src) || WRAPS(dst)) - minix_panic("vm_copy: linear address wraps", NO_NUM); - - while(bytes > 0) { - u32_t n, flags; - phys_bytes p_src, p_dst; -#define PAGEREMAIN(v) (I386_PAGE_SIZE - ((v) % I386_PAGE_SIZE)) - - /* We can copy this number of bytes without - * crossing a page boundary, but don't copy more - * than asked. - */ - n = MIN(PAGEREMAIN(src), PAGEREMAIN(dst)); - n = MIN(n, bytes); - vmassert(n > 0); - vmassert(n <= I386_PAGE_SIZE); - - /* Convert both virtual addresses to physical and do - * copy. - */ - LOOKUP(p_src, srcproc, src, NULL); - LOOKUP(p_dst, dstproc, dst, &flags); - if(!(flags & I386_VM_WRITE)) { - kprintf("vm_copy: copying to nonwritable page\n"); - kprintf("kernel stack: "); - util_stacktrace(); - return EFAULT; - } - phys_copy(p_src, p_dst, n); - - /* Book number of bytes copied. */ - vmassert(bytes >= n); - bytes -= n; - src += n; - dst += n; - } - - return OK; -} - /*===========================================================================* * vm_contiguous * *===========================================================================*/ @@ -493,155 +592,99 @@ PUBLIC int vm_contiguous(struct proc *targetproc, u32_t vir_buf, size_t bytes) boundaries++; } - if(verbose_vm) - kprintf("vm_contiguous: yes (%d boundaries tested)\n", - boundaries); - return 1; } -int vm_checkrange_verbose = 0; +/*===========================================================================* + * vm_suspend * + *===========================================================================*/ +PUBLIC int vm_suspend(struct proc *caller, struct proc *target, + vir_bytes linaddr, vir_bytes len, int wrflag, int type) +{ + /* This range is not OK for this process. Set parameters + * of the request and notify VM about the pending request. + */ + vmassert(!RTS_ISSET(caller, VMREQUEST)); + vmassert(!RTS_ISSET(target, VMREQUEST)); + + RTS_LOCK_SET(caller, VMREQUEST); + +#if DEBUG_VMASSERT + caller->p_vmrequest.stacktrace[0] = '\0'; + util_stacktrace_strcat(caller->p_vmrequest.stacktrace); +#endif + + caller->p_vmrequest.writeflag = 1; + caller->p_vmrequest.start = linaddr; + caller->p_vmrequest.length = len; + caller->p_vmrequest.who = target->p_endpoint; + caller->p_vmrequest.type = type; + + /* Connect caller on vmrequest wait queue. */ + if(!(caller->p_vmrequest.nextrequestor = vmrequest)) + mini_notify(proc_addr(SYSTEM), VM_PROC_NR); + vmrequest = caller; +} /*===========================================================================* - * vm_checkrange * + * delivermsg * *===========================================================================*/ -PUBLIC int vm_checkrange(struct proc *caller, struct proc *target, - vir_bytes vir, vir_bytes bytes, int wrfl, int checkonly) +int delivermsg(struct proc *rp) { - u32_t flags, po, v; + phys_bytes addr; int r; + NOREC_ENTER(deliver); - if(!HASPT(target)) - return OK; + vmassert(rp->p_misc_flags & MF_DELIVERMSG); + vmassert(rp->p_delivermsg.m_source != NONE); - /* If caller has had a reply to this request, return it. */ - if(RTS_ISSET(caller, VMREQUEST)) { - if(caller->p_vmrequest.who == target->p_endpoint) { - if(caller->p_vmrequest.vmresult == VMSUSPEND) - minix_panic("check sees VMSUSPEND?", NO_NUM); - RTS_LOCK_UNSET(caller, VMREQUEST); -#if 0 - kprintf("SYSTEM: vm_checkrange: returning vmresult %d\n", - caller->p_vmrequest.vmresult); -#endif - return caller->p_vmrequest.vmresult; - } else { -#if 0 - kprintf("SYSTEM: vm_checkrange: caller has a request for %d, " - "but our target is %d\n", - caller->p_vmrequest.who, target->p_endpoint); -#endif - } + vmassert(rp->p_delivermsg_lin); +#if DEBUG_VMASSERT + if(rp->p_delivermsg_lin != + umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message))) { + printf("vir: 0x%lx lin was: 0x%lx umap now: 0x%lx\n", + rp->p_delivermsg_vir, rp->p_delivermsg_lin, + umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message))); + minix_panic("that's wrong", NO_NUM); } - po = vir % I386_PAGE_SIZE; - if(po > 0) { - vir -= po; - bytes += po; +#endif + + vm_set_cr3(rp); + + PHYS_COPY_CATCH(vir2phys(&rp->p_delivermsg), + rp->p_delivermsg_lin, sizeof(message), addr); + + if(addr) { + vm_suspend(rp, rp, rp->p_delivermsg_lin, sizeof(message), 1, + VMSTYPE_DELIVERMSG); + r = VMSUSPEND; + } else { +#if DEBUG_VMASSERT + rp->p_delivermsg.m_source = NONE; + rp->p_delivermsg_lin = 0; +#endif + rp->p_misc_flags &= ~MF_DELIVERMSG; + r = OK; } - vmassert(target); - vmassert(bytes > 0); - - for(v = vir; v < vir + bytes; v+= I386_PAGE_SIZE) { - u32_t phys; - - /* If page exists and it's writable if desired, we're OK - * for this page. - */ - if(vm_lookup(target, v, &phys, &flags) == OK && - !(wrfl && !(flags & I386_VM_WRITE))) { - if(vm_checkrange_verbose) { -#if 0 - kprintf("SYSTEM: checkrange:%s:%d: 0x%lx: write 0x%lx, flags 0x%lx, phys 0x%lx, OK\n", - target->p_name, target->p_endpoint, v, wrfl, flags, phys); -#endif - } - continue; - } - - if(vm_checkrange_verbose) { - kprintf("SYSTEM: checkrange:%s:%d: 0x%lx: write 0x%lx, flags 0x%lx, phys 0x%lx, NOT OK\n", - target->p_name, target->p_endpoint, v, wrfl, flags, phys); - } - - if(checkonly) { - return VMSUSPEND; - } - - /* This range is not OK for this process. Set parameters - * of the request and notify VM about the pending request. - */ - if(RTS_ISSET(caller, VMREQUEST)) - minix_panic("VMREQUEST already set", caller->p_endpoint); - RTS_LOCK_SET(caller, VMREQUEST); - - /* Set parameters in caller. */ - caller->p_vmrequest.writeflag = wrfl; - caller->p_vmrequest.start = vir; - caller->p_vmrequest.length = bytes; - caller->p_vmrequest.who = target->p_endpoint; - - /* Set caller in target. */ - target->p_vmrequest.requestor = caller; - - /* Connect caller on vmrequest wait queue. */ - caller->p_vmrequest.nextrequestor = vmrequest; - vmrequest = caller; - if(!caller->p_vmrequest.nextrequestor) { - int n = 0; - struct proc *vmr; - for(vmr = vmrequest; vmr; vmr = vmr->p_vmrequest.nextrequestor) - n++; - soft_notify(VM_PROC_NR); -#if 0 - kprintf("(%d) ", n); - kprintf("%d/%d ", - caller->p_endpoint, target->p_endpoint); - util_stacktrace(); -#endif - } - -#if 0 - kprintf("SYSTEM: vm_checkrange: range bad for " - "target %s:0x%lx-0x%lx, caller %s\n", - target->p_name, vir, vir+bytes, caller->p_name); - - kprintf("vm_checkrange kernel trace: "); - util_stacktrace(); - kprintf("target trace: "); - proc_stacktrace(target); -#endif - - if(target->p_endpoint == VM_PROC_NR) { - kprintf("caller trace: "); - proc_stacktrace(caller); - kprintf("target trace: "); - proc_stacktrace(target); - minix_panic("VM ranges should be OK", NO_NUM); - } - - return VMSUSPEND; - } - - return OK; + NOREC_RETURN(deliver, r); } char *flagstr(u32_t e, int dir) { static char str[80]; strcpy(str, ""); -#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); } } while(0) FLAG(I386_VM_PRESENT); FLAG(I386_VM_WRITE); FLAG(I386_VM_USER); FLAG(I386_VM_PWT); FLAG(I386_VM_PCD); + FLAG(I386_VM_GLOBAL); if(dir) FLAG(I386_VM_BIGPAGE); /* Page directory entry only */ else FLAG(I386_VM_DIRTY); /* Page table entry only */ - return str; } @@ -658,8 +701,9 @@ void vm_pt_print(u32_t *pagetable, u32_t v) if(!(pte_v & I386_VM_PRESENT)) continue; pfa = I386_VM_PFA(pte_v); - kprintf("%4d:%08lx:%08lx ", - pte, v + I386_PAGE_SIZE*pte, pfa); + kprintf("%4d:%08lx:%08lx %2s ", + pte, v + I386_PAGE_SIZE*pte, pfa, + (pte_v & I386_VM_WRITE) ? "rw":"RO"); col++; if(col == 3) { kprintf("\n"); col = 0; } } @@ -668,31 +712,85 @@ void vm_pt_print(u32_t *pagetable, u32_t v) return; } -/*===========================================================================* - * vm_print * - *===========================================================================*/ void vm_print(u32_t *root) { int pde; vmassert(!((u32_t) root % I386_PAGE_SIZE)); - for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) { + printf("page table 0x%lx:\n", root); + + for(pde = 10; pde < I386_VM_DIR_ENTRIES; pde++) { u32_t pde_v; u32_t *pte_a; pde_v = phys_get32((u32_t) (root + pde)); if(!(pde_v & I386_VM_PRESENT)) continue; - pte_a = (u32_t *) I386_VM_PFA(pde_v); - kprintf("%4d: pt %08lx %s\n", - pde, pte_a, flagstr(pde_v, 1)); - vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE); + if(pde_v & I386_VM_BIGPAGE) { + kprintf("%4d: 0x%lx, flags %s\n", + pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1)); + } else { + pte_a = (u32_t *) I386_VM_PFA(pde_v); + kprintf("%4d: pt %08lx %s\n", + pde, pte_a, flagstr(pde_v, 1)); + vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE); + kprintf("\n"); + } } return; } +u32_t thecr3; + +u32_t read_cr3(void) +{ + level0(getcr3val); + return thecr3; +} + + +/*===========================================================================* + * lin_memset * + *===========================================================================*/ +int vm_phys_memset(phys_bytes ph, u8_t c, phys_bytes bytes) +{ + char *v; + u32_t p; + NOREC_ENTER(physmemset); + + p = c | (c << 8) | (c << 16) | (c << 24); + + if(!vm_running) { + phys_memset(ph, p, bytes); + NOREC_RETURN(physmemset, OK); + } + + vmassert(nfreepdes >= 3); + + /* With VM, we have to map in the physical memory. + * We can do this 4MB at a time. + */ + while(bytes > 0) { + int pde, t; + vir_bytes chunk = bytes; + phys_bytes ptr; + inusepde = NOPDE; + CREATEPDE(((struct proc *) NULL), ptr, ph, chunk, bytes, pde, t); + /* We can memset as many bytes as we have remaining, + * or as many as remain in the 4MB chunk we mapped in. + */ + phys_memset(ptr, p, chunk); + DONEPDE(pde); + bytes -= chunk; + ph += chunk; + } + + + NOREC_RETURN(physmemset, OK); +} + /*===========================================================================* * virtual_copy_f * *===========================================================================*/ @@ -710,6 +808,7 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ int seg_index; int i, r; struct proc *procs[2]; + NOREC_ENTER(virtualcopy); /* Check copy count. */ if (bytes <= 0) return(EDOM); @@ -735,7 +834,9 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ switch(type) { case LOCAL_SEG: case LOCAL_VM_SEG: - if(!p) return EDEADSRCDST; + if(!p) { + NOREC_RETURN(virtualcopy, EDEADSRCDST); + } seg_index = vir_addr[i]->segment & SEGMENT_INDEX; if(type == LOCAL_SEG) phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset, @@ -751,7 +852,9 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ } break; case REMOTE_SEG: - if(!p) return EDEADSRCDST; + if(!p) { + NOREC_RETURN(virtualcopy, EDEADSRCDST); + } seg_index = vir_addr[i]->segment & SEGMENT_INDEX; phys_addr[i] = umap_remote(p, seg_index, vir_addr[i]->offset, bytes); break; @@ -763,43 +866,96 @@ int vmcheck; /* if nonzero, can return VMSUSPEND */ case PHYS_SEG: phys_addr[i] = vir_addr[i]->offset; break; - case GRANT_SEG: - phys_addr[i] = umap_grant(p, vir_addr[i]->offset, bytes); - break; default: kprintf("virtual_copy: strange type 0x%x\n", type); - return(EINVAL); + NOREC_RETURN(virtualcopy, EINVAL); } /* Check if mapping succeeded. */ if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG) { kprintf("virtual_copy EFAULT\n"); - return(EFAULT); + NOREC_RETURN(virtualcopy, EFAULT); } } - if(vmcheck && procs[_SRC_]) - CHECKRANGE_OR_SUSPEND(procs[_SRC_], phys_addr[_SRC_], bytes, 0); - if(vmcheck && procs[_DST_]) - CHECKRANGE_OR_SUSPEND(procs[_DST_], phys_addr[_DST_], bytes, 1); + if(vm_running) { + int r; + struct proc *caller; -#define NOPT(p) (!(p) || !HASPT(p)) - /* Now copy bytes between physical addresseses. */ - if(NOPT(procs[_SRC_]) && NOPT(procs[_DST_])) { - /* Without vm, address ranges actually are physical. */ - phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes); - r = OK; - } else { - /* With vm, addresses need further interpretation. */ - r = vm_copy(phys_addr[_SRC_], procs[_SRC_], - phys_addr[_DST_], procs[_DST_], (phys_bytes) bytes); - if(r != OK) { - kprintf("vm_copy: %lx to %lx failed\n", - phys_addr[_SRC_],phys_addr[_DST_]); + caller = proc_addr(who_p); + + if(RTS_ISSET(caller, VMREQUEST)) { + struct proc *target; + int pn; + vmassert(caller->p_vmrequest.vmresult != VMSUSPEND); + RTS_LOCK_UNSET(caller, VMREQUEST); + if(caller->p_vmrequest.vmresult != OK) { + printf("virtual_copy: returning VM error %d\n", + caller->p_vmrequest.vmresult); + NOREC_RETURN(virtualcopy, caller->p_vmrequest.vmresult); + } } + + if((r=lin_lin_copy(procs[_SRC_], phys_addr[_SRC_], + procs[_DST_], phys_addr[_DST_], bytes)) != OK) { + struct proc *target; + int wr; + phys_bytes lin; + if(r != EFAULT_SRC && r != EFAULT_DST) + minix_panic("lin_lin_copy failed", r); + if(!vmcheck) { + NOREC_RETURN(virtualcopy, r); + } + + vmassert(procs[_SRC_] && procs[_DST_]); + + if(r == EFAULT_SRC) { + lin = phys_addr[_SRC_]; + target = procs[_SRC_]; + wr = 0; + } else if(r == EFAULT_DST) { + lin = phys_addr[_DST_]; + target = procs[_DST_]; + wr = 1; + } else { + minix_panic("r strange", r); + } + +#if 0 + printf("virtual_copy: suspending caller %d / %s, target %d / %s\n", + caller->p_endpoint, caller->p_name, + target->p_endpoint, target->p_name); +#endif + + vmassert(k_reenter == -1); + vmassert(proc_ptr->p_endpoint == SYSTEM); + vm_suspend(caller, target, lin, bytes, wr, VMSTYPE_KERNELCALL); + + NOREC_RETURN(virtualcopy, VMSUSPEND); + } + + NOREC_RETURN(virtualcopy, OK); } - return(r); + vmassert(!vm_running); + + /* can't copy to/from process with PT without VM */ +#define NOPT(p) (!(p) || !HASPT(p)) + if(!NOPT(procs[_SRC_])) { + kprintf("ignoring page table src: %s / %d at 0x%lx\n", + procs[_SRC_]->p_name, procs[_SRC_]->p_endpoint, procs[_SRC_]->p_seg.p_cr3); +} + if(!NOPT(procs[_DST_])) { + kprintf("ignoring page table dst: %s / %d at 0x%lx\n", + procs[_DST_]->p_name, procs[_DST_]->p_endpoint, + procs[_DST_]->p_seg.p_cr3); + } + + /* Now copy bytes between physical addresseses. */ + if(phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes)) + NOREC_RETURN(virtualcopy, EFAULT); + + NOREC_RETURN(virtualcopy, OK); } /*===========================================================================* @@ -821,6 +977,25 @@ PUBLIC int data_copy( return virtual_copy(&src, &dst, bytes); } +/*===========================================================================* + * data_copy_vmcheck * + *===========================================================================*/ +PUBLIC int data_copy_vmcheck( + endpoint_t from_proc, vir_bytes from_addr, + endpoint_t to_proc, vir_bytes to_addr, + size_t bytes) +{ + struct vir_addr src, dst; + + src.segment = dst.segment = D; + src.offset = from_addr; + dst.offset = to_addr; + src.proc_nr_e = from_proc; + dst.proc_nr_e = to_proc; + + return virtual_copy_vmcheck(&src, &dst, bytes); +} + /*===========================================================================* * arch_pre_exec * *===========================================================================*/ @@ -852,4 +1027,10 @@ PUBLIC int arch_umap(struct proc *pr, vir_bytes offset, vir_bytes count, return EINVAL; } - +/* VM reports page directory slot we're allowed to use freely. */ +void i386_freepde(int pde) +{ + if(nfreepdes >= WANT_FREEPDES) + return; + freepdes[nfreepdes++] = pde; +} diff --git a/kernel/arch/i386/mpx386.s b/kernel/arch/i386/mpx386.s index 066df8573..3c2248c48 100755 --- a/kernel/arch/i386/mpx386.s +++ b/kernel/arch/i386/mpx386.s @@ -60,7 +60,6 @@ begbss: #include #include #include "../../const.h" -#include "vm.h" #include "sconst.h" /* Selected 386 tss offsets. */ @@ -74,9 +73,8 @@ begbss: .define _restart .define save -.define _kernel_cr3 -.define _pagefault_cr2 -.define _pagefault_count +.define _reload_cr3 +.define _write_cr3 ! write cr3 .define errexception .define exception1 @@ -101,6 +99,8 @@ begbss: .define _params_size .define _params_offset .define _mon_ds +.define _schedcheck +.define _dirtypde .define _hwint00 ! handlers for hardware interrupts .define _hwint01 @@ -218,12 +218,6 @@ csinit: ltr ax push 0 ! set flags to known good state popf ! esp, clear nested task and int enable -#if VM_KERN_NOPAGEZERO - jmp laststep - -.align I386_PAGE_SIZE -laststep: -#endif jmp _main ! main() @@ -239,7 +233,6 @@ laststep: #define hwint_master(irq) \ call save /* save interrupted process state */;\ push (_irq_handlers+4*irq) /* irq_handlers[irq] */;\ - LOADCR3WITHEAX(irq, (_kernel_cr3)) /* switch to kernel page table */;\ call _intr_handle /* intr_handle(irq_handlers[irq]) */;\ pop ecx ;\ cmp (_irq_actids+4*irq), 0 /* interrupt still active? */;\ @@ -291,7 +284,6 @@ _hwint07: ! Interrupt routine for irq 7 (printer) #define hwint_slave(irq) \ call save /* save interrupted process state */;\ push (_irq_handlers+4*irq) /* irq_handlers[irq] */;\ - LOADCR3WITHEAX(irq, (_kernel_cr3)) /* switch to kernel page table */;\ call _intr_handle /* intr_handle(irq_handlers[irq]) */;\ pop ecx ;\ cmp (_irq_actids+4*irq), 0 /* interrupt still active? */;\ @@ -398,11 +390,9 @@ _p_s_call: push eax ! source / destination push ecx ! call number (ipc primitive to use) -! LOADCR3WITHEAX(0x20, (_kernel_cr3)) - call _sys_call ! sys_call(call_nr, src_dst, m_ptr, bit_map) ! caller is now explicitly in proc_ptr - mov AXREG(esi), eax ! sys_call MUST PRESERVE si + mov AXREG(esi), eax ! Fall into code to restart proc/task running. @@ -413,14 +403,21 @@ _restart: ! Restart the current process or the next process if it is set. - cmp (_next_ptr), 0 ! see if another process is scheduled - jz 0f - mov eax, (_next_ptr) - mov (_proc_ptr), eax ! schedule new process - mov (_next_ptr), 0 -0: mov esp, (_proc_ptr) ! will assume P_STACKBASE == 0 + cli + call _schedcheck ! ask C function who we're running + mov esp, (_proc_ptr) ! will assume P_STACKBASE == 0 lldt P_LDT_SEL(esp) ! enable process' segment descriptors - LOADCR3WITHEAX(0x21, P_CR3(esp)) ! switch to process page table + cmp P_CR3(esp), 0 ! process does not have its own PT + jz 0f + mov eax, P_CR3(esp) + cmp eax, (loadedcr3) + jz 0f + mov cr3, eax + mov (loadedcr3), eax + mov eax, (_proc_ptr) + mov (_ptproc), eax + mov (_dirtypde), 0 +0: lea eax, P_STACKTOP(esp) ! arrange for next interrupt mov (_tss+TSS3_S_SP0), eax ! to save state in process table restart1: @@ -496,8 +493,7 @@ _page_fault: push PAGE_FAULT_VECTOR push eax mov eax, cr2 -sseg mov (_pagefault_cr2), eax -sseg inc (_pagefault_count) +sseg mov (pagefaultcr2), eax pop eax jmp errexception @@ -526,19 +522,26 @@ errexception: sseg pop (ex_number) sseg pop (trap_errno) exception1: ! Common for all exceptions. + sseg mov (old_eax_ptr), esp ! where will eax be saved? + sseg sub (old_eax_ptr), PCREG-AXREG ! here + push eax ! eax is scratch register mov eax, 0+4(esp) ! old eip sseg mov (old_eip), eax + mov eax, esp + add eax, 4 + sseg mov (old_eip_ptr), eax movzx eax, 4+4(esp) ! old cs sseg mov (old_cs), eax mov eax, 8+4(esp) ! old eflags sseg mov (old_eflags), eax - LOADCR3WITHEAX(0x24, (_kernel_cr3)) - pop eax call save + push (pagefaultcr2) + push (old_eax_ptr) + push (old_eip_ptr) push (old_eflags) push (old_cs) push (old_eip) @@ -546,7 +549,38 @@ exception1: ! Common for all exceptions. push (ex_number) call _exception ! (ex_number, trap_errno, old_eip, ! old_cs, old_eflags) - add esp, 5*4 + add esp, 8*4 + ret + + +!*===========================================================================* +!* write_cr3 * +!*===========================================================================* +! PUBLIC void write_cr3(unsigned long value); +_write_cr3: + push ebp + mov ebp, esp + mov eax, 8(ebp) + cmp eax, (loadedcr3) + jz 0f + mov cr3, eax + mov (loadedcr3), eax + mov (_dirtypde), 0 +0: + pop ebp + ret + +!*===========================================================================* +!* reload_cr3 * +!*===========================================================================* +! PUBLIC void reload_cr3(void); +_reload_cr3: + push ebp + mov ebp, esp + mov (_dirtypde), 0 + mov eax, cr3 + mov cr3, eax + pop ebp ret !*===========================================================================* @@ -556,24 +590,12 @@ _level0_call: call save jmp (_level0_func) -!*===========================================================================* -!* load_kernel_cr3 * -!*===========================================================================* -.align 16 -_load_kernel_cr3: - mov eax, (_kernel_cr3) - mov cr3, eax - ret - !*===========================================================================* !* data * !*===========================================================================* .sect .rom ! Before the string table please .data2 0x526F ! this must be the first data entry (magic #) -#if VM_KERN_NOPAGEZERO -.align I386_PAGE_SIZE -#endif .sect .bss k_stack: @@ -581,7 +603,11 @@ k_stack: k_stktop: ! top of kernel stack .comm ex_number, 4 .comm trap_errno, 4 + .comm old_eip_ptr, 4 + .comm old_eax_ptr, 4 .comm old_eip, 4 .comm old_cs, 4 .comm old_eflags, 4 + .comm pagefaultcr2, 4 + .comm loadedcr3, 4 diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index 398f8d19c..b2ae9eaff 100755 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -167,6 +167,11 @@ PUBLIC void prot_init(void) unsigned ldt_index; register struct proc *rp; + /* Click-round kernel. */ + if(kinfo.data_base % CLICK_SIZE) + minix_panic("kinfo.data_base not aligned", NO_NUM); + kinfo.data_size = ((kinfo.data_size+CLICK_SIZE-1)/CLICK_SIZE) * CLICK_SIZE; + /* Build gdt and idt pointers in GDT where the BIOS expects them. */ dtp= (struct desctableptr_s *) &gdt[GDT_INDEX]; * (u16_t *) dtp->limit = (sizeof gdt) - 1; @@ -334,3 +339,118 @@ PUBLIC void alloc_segments(register struct proc *rp) rp->p_reg.ds = (DS_LDT_INDEX*DESC_SIZE) | TI | privilege; } +/*===========================================================================* + * printseg * + *===========================================================================*/ +PUBLIC void printseg(char *banner, int iscs, struct proc *pr, u32_t selector) +{ + u32_t base, limit, index, dpl; + struct segdesc_s *desc; + + if(banner) { kprintf("%s", banner); } + + index = selector >> 3; + + kprintf("RPL %d, ind %d of ", + (selector & RPL_MASK), index); + + if(selector & TI) { + kprintf("LDT"); + if(index < 0 || index >= LDT_SIZE) { + kprintf("invalid index in ldt\n"); + return; + } + desc = &pr->p_seg.p_ldt[index]; + } else { + kprintf("GDT"); + if(index < 0 || index >= GDT_SIZE) { + kprintf("invalid index in gdt\n"); + return; + } + desc = &gdt[index]; + } + + limit = desc->limit_low | + (((u32_t) desc->granularity & LIMIT_HIGH) << GRANULARITY_SHIFT); + + if(desc->granularity & GRANULAR) { + limit = (limit << PAGE_GRAN_SHIFT) + 0xfff; + } + + base = desc->base_low | + ((u32_t) desc->base_middle << BASE_MIDDLE_SHIFT) | + ((u32_t) desc->base_high << BASE_HIGH_SHIFT); + + kprintf(" -> base 0x%08lx size 0x%08lx ", base, limit+1); + + if(iscs) { + if(!(desc->granularity & BIG)) + kprintf("16bit "); + } else { + if(!(desc->granularity & BIG)) + kprintf("not big "); + } + + if(desc->granularity & 0x20) { /* reserved */ + minix_panic("granularity reserved field set", NO_NUM); + } + + if(!(desc->access & PRESENT)) + kprintf("notpresent "); + + if(!(desc->access & SEGMENT)) + kprintf("system "); + + if(desc->access & EXECUTABLE) { + kprintf(" exec "); + if(desc->access & CONFORMING) kprintf("conforming "); + if(!(desc->access & READABLE)) kprintf("non-readable "); + } else { + kprintf("nonexec "); + if(desc->access & EXPAND_DOWN) kprintf("non-expand-down "); + if(!(desc->access & WRITEABLE)) kprintf("non-writable "); + } + + if(!(desc->access & ACCESSED)) { + kprintf("nonacc "); + } + + dpl = ((u32_t) desc->access & DPL) >> DPL_SHIFT; + + kprintf("DPL %d\n", dpl); + + return; +} + +/*===========================================================================* + * prot_set_kern_seg_limit * + *===========================================================================*/ +PUBLIC int prot_set_kern_seg_limit(vir_bytes limit) +{ + struct proc *rp; + vir_bytes prev; + int orig_click; + int incr_clicks; + + if(limit <= kinfo.data_base) { + kprintf("prot_set_kern_seg_limit: limit bogus\n"); + return EINVAL; + } + + /* Do actual increase. */ + orig_click = kinfo.data_size / CLICK_SIZE; + kinfo.data_size = limit - kinfo.data_base; + incr_clicks = kinfo.data_size / CLICK_SIZE - orig_click; + + prot_init(); + + /* Increase kernel processes too. */ + for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) { + if (RTS_ISSET(rp, SLOT_FREE) || !iskernelp(rp)) + continue; + rp->p_memmap[S].mem_len += incr_clicks; + alloc_segments(rp); + } + + return OK; +} diff --git a/kernel/arch/i386/proto.h b/kernel/arch/i386/proto.h index 8b18f0c91..5ab71ea57 100644 --- a/kernel/arch/i386/proto.h +++ b/kernel/arch/i386/proto.h @@ -49,11 +49,17 @@ _PROTOTYPE( void vir_insb, (u16_t port, struct proc *proc, u32_t vir, size_t cou _PROTOTYPE( void vir_outsb, (u16_t port, struct proc *proc, u32_t vir, size_t count)); _PROTOTYPE( void vir_insw, (u16_t port, struct proc *proc, u32_t vir, size_t count)); _PROTOTYPE( void vir_outsw, (u16_t port, struct proc *proc, u32_t vir, size_t count)); +_PROTOTYPE( void i386_updatepde, (int pde, u32_t val)); +_PROTOTYPE( void i386_freepde, (int pde)); +_PROTOTYPE( void getcr3val, (void)); +_PROTOTYPE( void switchedcr3, (void)); +_PROTOTYPE( void vm_set_cr3, (struct proc *)); /* exception.c */ _PROTOTYPE( void exception, (unsigned vec_nr, u32_t trap_errno, - u32_t old_eip, U16_t old_cs, u32_t old_eflags) ); + u32_t old_eip, U16_t old_cs, u32_t old_eflags, + u32_t *old_eip_ptr, u32_t *old_eax_ptr, u32_t pagefaultcr2) ); /* klib386.s */ _PROTOTYPE( void level0, (void (*func)(void)) ); @@ -70,7 +76,12 @@ _PROTOTYPE( void phys_insb, (U16_t port, phys_bytes buf, size_t count) ); _PROTOTYPE( void phys_insw, (U16_t port, phys_bytes buf, size_t count) ); _PROTOTYPE( void phys_outsb, (U16_t port, phys_bytes buf, size_t count) ); _PROTOTYPE( void phys_outsw, (U16_t port, phys_bytes buf, size_t count) ); -_PROTOTYPE( void i386_invlpg, (U32_t addr) ); +_PROTOTYPE( void i386_invlpg_level0, (void) ); +_PROTOTYPE( int _memcpy_k, (void *dst, void *src, size_t n) ); +_PROTOTYPE( int _memcpy_k_fault, (void) ); +_PROTOTYPE( u32_t read_cr3, (void) ); +_PROTOTYPE( void reload_cr3, (void) ); +_PROTOTYPE( void phys_memset, (phys_bytes ph, u32_t c, phys_bytes bytes) ); /* protect.c */ _PROTOTYPE( void prot_init, (void) ); @@ -79,6 +90,8 @@ _PROTOTYPE( void init_codeseg, (struct segdesc_s *segdp, phys_bytes base, _PROTOTYPE( void init_dataseg, (struct segdesc_s *segdp, phys_bytes base, vir_bytes size, int privilege) ); _PROTOTYPE( void enable_iop, (struct proc *pp) ); +_PROTOTYPE( int prot_set_kern_seg_limit, (vir_bytes limit) ); +_PROTOTYPE( void printseg, (char *banner, int iscs, struct proc *pr, u32_t selector) ); /* prototype of an interrupt vector table entry */ struct gate_table_s { diff --git a/kernel/arch/i386/sha1.h b/kernel/arch/i386/sha1.h new file mode 100644 index 000000000..dbfdff356 --- /dev/null +++ b/kernel/arch/i386/sha1.h @@ -0,0 +1,551 @@ +/* sha1.c : Implementation of the Secure Hash Algorithm */ + +/* SHA: NIST's Secure Hash Algorithm */ + +/* This version written November 2000 by David Ireland of + DI Management Services Pty Limited + + Adapted from code in the Python Cryptography Toolkit, + version 1.0.0 by A.M. Kuchling 1995. +*/ + +/* AM Kuchling's posting:- + Based on SHA code originally posted to sci.crypt by Peter Gutmann + in message <30ajo5$oe8@ccu2.auckland.ac.nz>. + Modified to test for endianness on creation of SHA objects by AMK. + Also, the original specification of SHA was found to have a weakness + by NSA/NIST. This code implements the fixed version of SHA. +*/ + +/* Here's the first paragraph of Peter Gutmann's posting: + +The following is my SHA (FIPS 180) code updated to allow use of the "fixed" +SHA, thanks to Jim Gillogly and an anonymous contributor for the information on +what's changed in the new version. The fix is a simple change which involves +adding a single rotate in the initial expansion function. It is unknown +whether this is an optimal solution to the problem which was discovered in the +SHA or whether it's simply a bandaid which fixes the problem with a minimum of +effort (for example the reengineering of a great many Capstone chips). +*/ + +/* h files included here to make this just one file ... */ + +/* global.h */ + +#ifndef _GLOBAL_H_ +#define _GLOBAL_H_ 1 + +/* POINTER defines a generic pointer type */ +typedef unsigned char *POINTER; + +/* UINT4 defines a four byte word */ +typedef unsigned long int UINT4; + +/* SHA1BYTE defines a unsigned character */ +typedef unsigned char SHA1BYTE; + +#endif /* end _GLOBAL_H_ */ + +/* sha.h */ + +#ifndef _SHA_H_ +#define _SHA_H_ 1 + +/* #include "global.h" */ + +/* The structure for storing SHS info */ + +typedef struct +{ + UINT4 digest[ 5 ]; /* Message digest */ + UINT4 countLo, countHi; /* 64-bit bit count */ + UINT4 data[ 16 ]; /* SHS data buffer */ + int Endianness; +} SHA_CTX; + +/* Message digest functions */ + +void SHAInit(SHA_CTX *); +void SHAUpdate(SHA_CTX *, SHA1BYTE *buffer, int count); +void SHAFinal(SHA1BYTE *output, SHA_CTX *); + +#endif /* end _SHA_H_ */ + +/* endian.h */ + +#ifndef _ENDIAN_H_ +#define _ENDIAN_H_ 1 + +void endianTest(int *endianness); + +#endif /* end _ENDIAN_H_ */ + + +/* sha.c */ + +#include +#include + +static void SHAtoByte(SHA1BYTE *output, UINT4 *input, unsigned int len); + +/* The SHS block size and message digest sizes, in bytes */ + +#define SHS_DATASIZE 64 +#define SHS_DIGESTSIZE 20 + + +/* The SHS f()-functions. The f1 and f3 functions can be optimized to + save one boolean operation each - thanks to Rich Schroeppel, + rcs@cs.arizona.edu for discovering this */ + +/*#define f1(x,y,z) ( ( x & y ) | ( ~x & z ) ) // Rounds 0-19 */ +#define f1(x,y,z) ( z ^ ( x & ( y ^ z ) ) ) /* Rounds 0-19 */ +#define f2(x,y,z) ( x ^ y ^ z ) /* Rounds 20-39 */ +/*#define f3(x,y,z) ( ( x & y ) | ( x & z ) | ( y & z ) ) // Rounds 40-59 */ +#define f3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) ) /* Rounds 40-59 */ +#define f4(x,y,z) ( x ^ y ^ z ) /* Rounds 60-79 */ + +/* The SHS Mysterious Constants */ + +#define K1 0x5A827999L /* Rounds 0-19 */ +#define K2 0x6ED9EBA1L /* Rounds 20-39 */ +#define K3 0x8F1BBCDCL /* Rounds 40-59 */ +#define K4 0xCA62C1D6L /* Rounds 60-79 */ + +/* SHS initial values */ + +#define h0init 0x67452301L +#define h1init 0xEFCDAB89L +#define h2init 0x98BADCFEL +#define h3init 0x10325476L +#define h4init 0xC3D2E1F0L + +/* Note that it may be necessary to add parentheses to these macros if they + are to be called with expressions as arguments */ +/* 32-bit rotate left - kludged with shifts */ + +#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) ) + +/* The initial expanding function. The hash function is defined over an + 80-UINT2 expanded input array W, where the first 16 are copies of the input + data, and the remaining 64 are defined by + + W[ i ] = W[ i - 16 ] ^ W[ i - 14 ] ^ W[ i - 8 ] ^ W[ i - 3 ] + + This implementation generates these values on the fly in a circular + buffer - thanks to Colin Plumb, colin@nyx10.cs.du.edu for this + optimization. + + The updated SHS changes the expanding function by adding a rotate of 1 + bit. Thanks to Jim Gillogly, jim@rand.org, and an anonymous contributor + for this information */ + +#define expand(W,i) ( W[ i & 15 ] = ROTL( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \ + W[ (i - 8) & 15 ] ^ W[ (i - 3) & 15 ] ) ) ) + + +/* The prototype SHS sub-round. The fundamental sub-round is: + + a' = e + ROTL( 5, a ) + f( b, c, d ) + k + data; + b' = a; + c' = ROTL( 30, b ); + d' = c; + e' = d; + + but this is implemented by unrolling the loop 5 times and renaming the + variables ( e, a, b, c, d ) = ( a', b', c', d', e' ) each iteration. + This code is then replicated 20 times for each of the 4 functions, using + the next 20 values from the W[] array each time */ + +#define subRound(a, b, c, d, e, f, k, data) \ + ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) ) + +/* Initialize the SHS values */ + +void SHAInit(SHA_CTX *shsInfo) +{ + endianTest(&shsInfo->Endianness); + /* Set the h-vars to their initial values */ + shsInfo->digest[ 0 ] = h0init; + shsInfo->digest[ 1 ] = h1init; + shsInfo->digest[ 2 ] = h2init; + shsInfo->digest[ 3 ] = h3init; + shsInfo->digest[ 4 ] = h4init; + + /* Initialise bit count */ + shsInfo->countLo = shsInfo->countHi = 0; +} + +/* Perform the SHS transformation. Note that this code, like MD5, seems to + break some optimizing compilers due to the complexity of the expressions + and the size of the basic block. It may be necessary to split it into + sections, e.g. based on the four subrounds + + Note that this corrupts the shsInfo->data area */ + +static void SHSTransform( UINT4 *digest, UINT4 *data ) + { + UINT4 A, B, C, Dv, E; /* Local vars */ + UINT4 eData[ 16 ]; /* Expanded data */ + + /* Set up first buffer and local data buffer */ + A = digest[ 0 ]; + B = digest[ 1 ]; + C = digest[ 2 ]; + Dv = digest[ 3 ]; + E = digest[ 4 ]; + memcpy( (POINTER)eData, (POINTER)data, SHS_DATASIZE ); + + /* Heavy mangling, in 4 sub-rounds of 20 interations each. */ + subRound( A, B, C, Dv, E, f1, K1, eData[ 0 ] ); + subRound( E, A, B, C, Dv, f1, K1, eData[ 1 ] ); + subRound( Dv, E, A, B, C, f1, K1, eData[ 2 ] ); + subRound( C, Dv, E, A, B, f1, K1, eData[ 3 ] ); + subRound( B, C, Dv, E, A, f1, K1, eData[ 4 ] ); + subRound( A, B, C, Dv, E, f1, K1, eData[ 5 ] ); + subRound( E, A, B, C, Dv, f1, K1, eData[ 6 ] ); + subRound( Dv, E, A, B, C, f1, K1, eData[ 7 ] ); + subRound( C, Dv, E, A, B, f1, K1, eData[ 8 ] ); + subRound( B, C, Dv, E, A, f1, K1, eData[ 9 ] ); + subRound( A, B, C, Dv, E, f1, K1, eData[ 10 ] ); + subRound( E, A, B, C, Dv, f1, K1, eData[ 11 ] ); + subRound( Dv, E, A, B, C, f1, K1, eData[ 12 ] ); + subRound( C, Dv, E, A, B, f1, K1, eData[ 13 ] ); + subRound( B, C, Dv, E, A, f1, K1, eData[ 14 ] ); + subRound( A, B, C, Dv, E, f1, K1, eData[ 15 ] ); + subRound( E, A, B, C, Dv, f1, K1, expand( eData, 16 ) ); + subRound( Dv, E, A, B, C, f1, K1, expand( eData, 17 ) ); + subRound( C, Dv, E, A, B, f1, K1, expand( eData, 18 ) ); + subRound( B, C, Dv, E, A, f1, K1, expand( eData, 19 ) ); + + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 20 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 21 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 22 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 23 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 24 ) ); + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 25 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 26 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 27 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 28 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 29 ) ); + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 30 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 31 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 32 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 33 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 34 ) ); + subRound( A, B, C, Dv, E, f2, K2, expand( eData, 35 ) ); + subRound( E, A, B, C, Dv, f2, K2, expand( eData, 36 ) ); + subRound( Dv, E, A, B, C, f2, K2, expand( eData, 37 ) ); + subRound( C, Dv, E, A, B, f2, K2, expand( eData, 38 ) ); + subRound( B, C, Dv, E, A, f2, K2, expand( eData, 39 ) ); + + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 40 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 41 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 42 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 43 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 44 ) ); + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 45 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 46 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 47 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 48 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 49 ) ); + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 50 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 51 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 52 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 53 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 54 ) ); + subRound( A, B, C, Dv, E, f3, K3, expand( eData, 55 ) ); + subRound( E, A, B, C, Dv, f3, K3, expand( eData, 56 ) ); + subRound( Dv, E, A, B, C, f3, K3, expand( eData, 57 ) ); + subRound( C, Dv, E, A, B, f3, K3, expand( eData, 58 ) ); + subRound( B, C, Dv, E, A, f3, K3, expand( eData, 59 ) ); + + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 60 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 61 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 62 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 63 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 64 ) ); + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 65 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 66 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 67 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 68 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 69 ) ); + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 70 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 71 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 72 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 73 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 74 ) ); + subRound( A, B, C, Dv, E, f4, K4, expand( eData, 75 ) ); + subRound( E, A, B, C, Dv, f4, K4, expand( eData, 76 ) ); + subRound( Dv, E, A, B, C, f4, K4, expand( eData, 77 ) ); + subRound( C, Dv, E, A, B, f4, K4, expand( eData, 78 ) ); + subRound( B, C, Dv, E, A, f4, K4, expand( eData, 79 ) ); + + /* Build message digest */ + digest[ 0 ] += A; + digest[ 1 ] += B; + digest[ 2 ] += C; + digest[ 3 ] += Dv; + digest[ 4 ] += E; + } + +/* When run on a little-endian CPU we need to perform byte reversal on an + array of long words. */ + +static void longReverse(UINT4 *buffer, int byteCount, int Endianness ) +{ + UINT4 value; + + if (Endianness) return; + byteCount /= sizeof( UINT4 ); + while( byteCount-- ) + { + value = *buffer; + value = ( ( value & 0xFF00FF00L ) >> 8 ) | \ + ( ( value & 0x00FF00FFL ) << 8 ); + *buffer++ = ( value << 16 ) | ( value >> 16 ); + } +} + +/* Update SHS for a block of data */ + +void SHAUpdate(SHA_CTX *shsInfo, SHA1BYTE *buffer, int count) +{ + UINT4 tmp; + int dataCount; + + /* Update bitcount */ + tmp = shsInfo->countLo; + if ( ( shsInfo->countLo = tmp + ( ( UINT4 ) count << 3 ) ) < tmp ) + shsInfo->countHi++; /* Carry from low to high */ + shsInfo->countHi += count >> 29; + + /* Get count of bytes already in data */ + dataCount = ( int ) ( tmp >> 3 ) & 0x3F; + + /* Handle any leading odd-sized chunks */ + if( dataCount ) + { + SHA1BYTE *p = ( SHA1BYTE * ) shsInfo->data + dataCount; + + dataCount = SHS_DATASIZE - dataCount; + if( count < dataCount ) + { + memcpy( p, buffer, count ); + return; + } + memcpy( p, buffer, dataCount ); + longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness); + SHSTransform( shsInfo->digest, shsInfo->data ); + buffer += dataCount; + count -= dataCount; + } + + /* Process data in SHS_DATASIZE chunks */ + while( count >= SHS_DATASIZE ) + { + memcpy( (POINTER)shsInfo->data, (POINTER)buffer, SHS_DATASIZE ); + longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness ); + SHSTransform( shsInfo->digest, shsInfo->data ); + buffer += SHS_DATASIZE; + count -= SHS_DATASIZE; + } + + /* Handle any remaining bytes of data. */ + memcpy( (POINTER)shsInfo->data, (POINTER)buffer, count ); + } + +/* Final wrapup - pad to SHS_DATASIZE-byte boundary with the bit pattern + 1 0* (64-bit count of bits processed, MSB-first) */ + +void SHAFinal(SHA1BYTE *output, SHA_CTX *shsInfo) +{ + int count; + SHA1BYTE *dataPtr; + + /* Compute number of bytes mod 64 */ + count = ( int ) shsInfo->countLo; + count = ( count >> 3 ) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + dataPtr = ( SHA1BYTE * ) shsInfo->data + count; + *dataPtr++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = SHS_DATASIZE - 1 - count; + + /* Pad out to 56 mod 64 */ + if( count < 8 ) + { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset( dataPtr, 0, count ); + longReverse( shsInfo->data, SHS_DATASIZE, shsInfo->Endianness ); + SHSTransform( shsInfo->digest, shsInfo->data ); + + /* Now fill the next block with 56 bytes */ + memset( (POINTER)shsInfo->data, 0, SHS_DATASIZE - 8 ); + } + else + /* Pad block to 56 bytes */ + memset( dataPtr, 0, count - 8 ); + + /* Append length in bits and transform */ + shsInfo->data[ 14 ] = shsInfo->countHi; + shsInfo->data[ 15 ] = shsInfo->countLo; + + longReverse( shsInfo->data, SHS_DATASIZE - 8, shsInfo->Endianness ); + SHSTransform( shsInfo->digest, shsInfo->data ); + + /* Output to an array of bytes */ + SHAtoByte(output, shsInfo->digest, SHS_DIGESTSIZE); + + /* Zeroise sensitive stuff */ + memset((POINTER)shsInfo, 0, sizeof(shsInfo)); +} + +static void SHAtoByte(SHA1BYTE *output, UINT4 *input, unsigned int len) +{ /* Output SHA digest in byte array */ + unsigned int i, j; + + for(i = 0, j = 0; j < len; i++, j += 4) + { + output[j+3] = (SHA1BYTE)( input[i] & 0xff); + output[j+2] = (SHA1BYTE)((input[i] >> 8 ) & 0xff); + output[j+1] = (SHA1BYTE)((input[i] >> 16) & 0xff); + output[j ] = (SHA1BYTE)((input[i] >> 24) & 0xff); + } +} + + +unsigned char digest[SHS_DIGESTSIZE]; +unsigned char testmessage[3] = {'a', 'b', 'c' }; +unsigned char *mess56 = (unsigned char *) + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; + +/* Correct solutions from FIPS PUB 180-1 */ +char *dig1 = "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D"; +char *dig2 = "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1"; +char *dig3 = "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"; + +/* Output should look like:- + a9993e36 4706816a ba3e2571 7850c26c 9cd0d89d + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D <= correct + 84983e44 1c3bd26e baae4aa1 f95129e5 e54670f1 + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 <= correct + 34aa973c d4c4daa4 f61eeb2b dbad2731 6534016f + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F <= correct +*/ + +void sha1test(void) +{ + SHA_CTX sha; + int i; + SHA1BYTE big[1000]; + + SHAInit(&sha); + SHAUpdate(&sha, testmessage, 3); + SHAFinal(digest, &sha); + + for (i = 0; i < SHS_DIGESTSIZE; i++) + { + if ((i % 4) == 0) printf(" "); + printf("%02x", digest[i]); + } + printf("\n"); + printf(" %s <= correct\n", dig1); + + SHAInit(&sha); + SHAUpdate(&sha, mess56, 56); + SHAFinal(digest, &sha); + + for (i = 0; i < SHS_DIGESTSIZE; i++) + { + if ((i % 4) == 0) printf(" "); + printf("%02x", digest[i]); + } + printf("\n"); + printf(" %s <= correct\n", dig2); + + /* Fill up big array */ + for (i = 0; i < 1000; i++) + big[i] = 'a'; + + SHAInit(&sha); + /* Digest 1 million x 'a' */ + for (i = 0; i < 1000; i++) + SHAUpdate(&sha, big, 1000); + SHAFinal(digest, &sha); + + for (i = 0; i < SHS_DIGESTSIZE; i++) + { + if ((i % 4) == 0) printf(" "); + printf("%02x", digest[i]); + } + printf("\n"); + printf(" %s <= correct\n", dig3); +} + +/* endian.c */ + +void endianTest(int *endian_ness) +{ + if((*(unsigned short *) ("#S") >> 8) == '#') + { + /* printf("Big endian = no change\n"); */ + *endian_ness = !(0); + } + else + { + /* printf("Little endian = swap\n"); */ + *endian_ness = 0; + } +} + +static char * +sha1print(char *digest) +{ + int i; + for(i = 0; i < SHS_DIGESTSIZE; i++) { + printf("%02x", (unsigned char) digest[i]); + } + printf("\n"); +} + +static int +phys_sha1(unsigned long ptr, unsigned long bytes, unsigned char *digest) +{ + unsigned long addr = 0; + SHA_CTX sha; + + SHAInit(&sha); + + while(bytes > 0) { + unsigned long chunk; + static unsigned char buf[1024]; + chunk = bytes > sizeof(buf) ? sizeof(buf) : bytes; + PHYS_COPY_CATCH(ptr, vir2phys(buf), chunk, addr); + if(addr) { + return EFAULT; + } + SHAUpdate(&sha, buf, chunk); + ptr += chunk; + bytes -= chunk; + } + + SHAFinal(digest, &sha); + return OK; +} + +static void +sha1(unsigned char *ptr, unsigned long bytes, unsigned char *digest) +{ + SHA_CTX sha; + + SHAInit(&sha); + SHAUpdate(&sha, ptr, bytes); + SHAFinal(digest, &sha); + + return; +} + diff --git a/kernel/arch/i386/system.c b/kernel/arch/i386/system.c index 805e4d451..80a7fb9a3 100644 --- a/kernel/arch/i386/system.c +++ b/kernel/arch/i386/system.c @@ -14,11 +14,11 @@ #include "proto.h" #include "../../proc.h" +#include "../../debug.h" #define CR0_EM 0x0004 /* set to enable trap on any FP instruction */ FORWARD _PROTOTYPE( void ser_debug, (int c)); -FORWARD _PROTOTYPE( void ser_dump_stats, (void)); PUBLIC void arch_shutdown(int how) { @@ -137,82 +137,143 @@ PUBLIC void do_ser_debug() ser_debug(c); } +PRIVATE void ser_dump_queues(void) +{ + int q; + for(q = 0; q < NR_SCHED_QUEUES; q++) { + struct proc *p; + if(rdy_head[q]) + printf("%2d: ", q); + for(p = rdy_head[q]; p; p = p->p_nextready) { + printf("%s / %d ", p->p_name, p->p_endpoint); + } + printf("\n"); + } + +} + +PRIVATE void ser_dump_segs(void) +{ + struct proc *pp; + for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++) + { + if (pp->p_rts_flags & SLOT_FREE) + continue; + kprintf("%d: %s ep %d\n", proc_nr(pp), pp->p_name, pp->p_endpoint); + printseg("cs: ", 1, pp, pp->p_reg.cs); + printseg("ds: ", 0, pp, pp->p_reg.ds); + if(pp->p_reg.ss != pp->p_reg.ds) { + printseg("ss: ", 0, pp, pp->p_reg.ss); + } + } +} + PRIVATE void ser_debug(int c) { + int u = 0; + do_serial_debug++; - kprintf("ser_debug: %d\n", c); + /* Disable interrupts so that we get a consistent state. */ + if(!intr_disabled()) { lock; u = 1; }; + switch(c) { case '1': ser_dump_proc(); break; case '2': - ser_dump_stats(); + ser_dump_queues(); break; + case '3': + ser_dump_segs(); + break; +#if DEBUG_TRACE +#define TOGGLECASE(ch, flag) \ + case ch: { \ + if(verboseflags & flag) { \ + verboseflags &= ~flag; \ + printf("%s disabled\n", #flag); \ + } else { \ + verboseflags |= flag; \ + printf("%s enabled\n", #flag); \ + } \ + break; \ + } + TOGGLECASE('8', VF_SCHEDULING) + TOGGLECASE('9', VF_PICKPROC) +#endif } do_serial_debug--; + if(u) { unlock; } } +PRIVATE void printslot(struct proc *pp, int level) +{ + struct proc *depproc = NULL; + int dep = NONE; +#define COL { int i; for(i = 0; i < level; i++) printf("> "); } + + if(level >= NR_PROCS) { + kprintf("loop??\n"); + return; + } + + COL + + kprintf("%d: %s %d prio %d/%d time %d/%d cr3 0x%lx rts %s misc %s", + proc_nr(pp), pp->p_name, pp->p_endpoint, + pp->p_priority, pp->p_max_priority, pp->p_user_time, + pp->p_sys_time, pp->p_seg.p_cr3, + rtsflagstr(pp->p_rts_flags), miscflagstr(pp->p_misc_flags)); + + if(pp->p_rts_flags & SENDING) { + dep = pp->p_sendto_e; + kprintf(" to: "); + } else if(pp->p_rts_flags & RECEIVING) { + dep = pp->p_getfrom_e; + kprintf(" from: "); + } + + if(dep != NONE) { + if(dep == ANY) { + kprintf(" ANY\n"); + } else { + int procno; + if(!isokendpt(dep, &procno)) { + kprintf(" ??? %d\n", dep); + } else { + depproc = proc_addr(procno); + if(depproc->p_rts_flags & SLOT_FREE) { + kprintf(" empty slot %d???\n", procno); + depproc = NULL; + } else { + kprintf(" %s\n", depproc->p_name); + } + } + } + } else { + kprintf("\n"); + } + + COL + proc_stacktrace(pp); + + + if(depproc) + printslot(depproc, level+1); +} + + PUBLIC void ser_dump_proc() { struct proc *pp; - int u = 0; - - /* Disable interrupts so that we get a consistent state. */ - if(!intr_disabled()) { lock; u = 1; }; for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++) { if (pp->p_rts_flags & SLOT_FREE) continue; - kprintf( - "%d: 0x%02x %s e %d src %d dst %d prio %d/%d time %d/%d EIP 0x%x\n", - proc_nr(pp), - pp->p_rts_flags, pp->p_name, - pp->p_endpoint, pp->p_getfrom_e, pp->p_sendto_e, - pp->p_priority, pp->p_max_priority, - pp->p_user_time, pp->p_sys_time, - pp->p_reg.pc); - proc_stacktrace(pp); + printslot(pp, 0); } - - if(u) { unlock; } -} - -PRIVATE void ser_dump_stats() -{ - kprintf("ipc_stats:\n"); - kprintf("deadproc: %d\n", ipc_stats.deadproc); - kprintf("bad_endpoint: %d\n", ipc_stats.bad_endpoint); - kprintf("dst_not_allowed: %d\n", ipc_stats.dst_not_allowed); - kprintf("bad_call: %d\n", ipc_stats.bad_call); - kprintf("call_not_allowed: %d\n", ipc_stats.call_not_allowed); - kprintf("bad_buffer: %d\n", ipc_stats.bad_buffer); - kprintf("deadlock: %d\n", ipc_stats.deadlock); - kprintf("not_ready: %d\n", ipc_stats.not_ready); - kprintf("src_died: %d\n", ipc_stats.src_died); - kprintf("dst_died: %d\n", ipc_stats.dst_died); - kprintf("no_priv: %d\n", ipc_stats.no_priv); - kprintf("bad_size: %d\n", ipc_stats.bad_size); - kprintf("bad_senda: %d\n", ipc_stats.bad_senda); - if (ex64hi(ipc_stats.total)) - { - kprintf("total: %x:%08x\n", ex64hi(ipc_stats.total), - ex64lo(ipc_stats.total)); - } - else - kprintf("total: %u\n", ex64lo(ipc_stats.total)); - - kprintf("sys_stats:\n"); - kprintf("bad_req: %d\n", sys_stats.bad_req); - kprintf("not_allowed: %d\n", sys_stats.not_allowed); - if (ex64hi(sys_stats.total)) - { - kprintf("total: %x:%08x\n", ex64hi(sys_stats.total), - ex64lo(sys_stats.total)); - } - else - kprintf("total: %u\n", ex64lo(sys_stats.total)); } #if SPROFILE diff --git a/kernel/arch/i386/vm.h b/kernel/arch/i386/vm.h deleted file mode 100644 index 1707ac990..000000000 --- a/kernel/arch/i386/vm.h +++ /dev/null @@ -1,27 +0,0 @@ - -.define _load_kernel_cr3 -.define _last_cr3 - -#define LOADKERNELCR3 ;\ - inc (_cr3switch) ;\ - mov eax, (_kernel_cr3) ;\ - cmp (_last_cr3), eax ;\ - jz 9f ;\ - push _load_kernel_cr3 ;\ - call _level0 ;\ - pop eax ;\ - mov eax, (_kernel_cr3) ;\ - mov (_last_cr3), eax ;\ - inc (_cr3reload) ;\ -9: - -#define LOADCR3WITHEAX(type, newcr3) ;\ -sseg inc (_cr3switch) ;\ -sseg mov eax, newcr3 ;\ -sseg cmp (_last_cr3), eax ;\ - jz 8f ;\ - mov cr3, eax ;\ -sseg inc (_cr3reload) ;\ -sseg mov (_last_cr3), eax ;\ -8: - diff --git a/kernel/clock.c b/kernel/clock.c index d5eb1ddfc..0be366e4b 100755 --- a/kernel/clock.c +++ b/kernel/clock.c @@ -230,25 +230,23 @@ irq_hook_t *hook; * If any of the timers expire, do_clocktick() will send out signals. */ expired = 0; - if ((proc_ptr->p_misc_flags & VIRT_TIMER) && + if ((proc_ptr->p_misc_flags & MF_VIRT_TIMER) && (proc_ptr->p_virt_left -= ticks) <= 0) expired = 1; - if ((proc_ptr->p_misc_flags & PROF_TIMER) && + if ((proc_ptr->p_misc_flags & MF_PROF_TIMER) && (proc_ptr->p_prof_left -= ticks) <= 0) expired = 1; if (! (priv(proc_ptr)->s_flags & BILLABLE) && - (bill_ptr->p_misc_flags & PROF_TIMER) && + (bill_ptr->p_misc_flags & MF_PROF_TIMER) && (bill_ptr->p_prof_left -= ticks) <= 0) expired = 1; -#if 0 /* Update load average. */ load_update(); -#endif /* Check if do_clocktick() must be called. Done for alarms and scheduling. * Some processes, such as the kernel tasks, cannot be preempted. */ if ((next_timeout <= realtime) || (proc_ptr->p_ticks_left <= 0) || expired) { prev_ptr = proc_ptr; /* store running process */ - lock_notify(HARDWARE, CLOCK); /* send notification */ + mini_notify(proc_addr(HARDWARE), CLOCK); /* send notification */ } if (do_serial_debug) diff --git a/kernel/debug.c b/kernel/debug.c index 12cd8aa64..7324d4857 100644 --- a/kernel/debug.c +++ b/kernel/debug.c @@ -25,6 +25,8 @@ check_runqueues_f(char *file, int line) minix_panic("check_runqueues called with interrupts enabled", NO_NUM); } + FIXME("check_runqueues being done"); + #define MYPANIC(msg) { \ kprintf("check_runqueues:%s:%d: %s\n", file, line, msg); \ minix_panic("check_runqueues failed", NO_NUM); \ @@ -94,7 +96,9 @@ check_runqueues_f(char *file, int line) for (xp = BEG_PROC_ADDR; xp < END_PROC_ADDR; ++xp) { if(xp->p_magic != PMAGIC) MYPANIC("p_magic wrong in proc table"); - if (! isemptyp(xp) && xp->p_ready && ! xp->p_found) { + if (isemptyp(xp)) + continue; + if(xp->p_ready && ! xp->p_found) { kprintf("sched error: ready proc %d not on queue\n", xp->p_nr); MYPANIC("ready proc not on scheduling queue"); if (l++ > MAX_LOOP) { MYPANIC("loop in debug.c?"); } @@ -103,3 +107,43 @@ check_runqueues_f(char *file, int line) } #endif /* DEBUG_SCHED_CHECK */ + +PUBLIC char * +rtsflagstr(int flags) +{ + static char str[100]; + str[0] = '\0'; + +#define FLAG(n) if(flags & n) { strcat(str, #n " "); } + + FLAG(SLOT_FREE); + FLAG(NO_PRIORITY); + FLAG(SENDING); + FLAG(RECEIVING); + FLAG(SIGNALED); + FLAG(SIG_PENDING); + FLAG(P_STOP); + FLAG(NO_PRIV); + FLAG(NO_ENDPOINT); + FLAG(VMINHIBIT); + FLAG(PAGEFAULT); + FLAG(VMREQUEST); + FLAG(VMREQTARGET); + + return str; +} + +PUBLIC char * +miscflagstr(int flags) +{ + static char str[100]; + str[0] = '\0'; + + FLAG(MF_REPLY_PEND); + FLAG(MF_ASYNMSG); + FLAG(MF_FULLVM); + FLAG(MF_DELIVERMSG); + + return str; +} + diff --git a/kernel/debug.h b/kernel/debug.h index 283b00be2..e25605e0c 100644 --- a/kernel/debug.h +++ b/kernel/debug.h @@ -8,6 +8,7 @@ */ #include +#include #include "config.h" /* Enable prints such as @@ -24,7 +25,46 @@ #define DEBUG_TIME_LOCKS 1 /* Runtime sanity checking. */ -#define DEBUG_VMASSERT 1 +#define DEBUG_VMASSERT 0 #define DEBUG_SCHED_CHECK 0 +#define DEBUG_STACK_CHECK 0 +#define DEBUG_TRACE 0 + +#if DEBUG_TRACE + +#define VF_SCHEDULING (1L << 1) +#define VF_PICKPROC (1L << 2) + +#define TRACE(code, statement) if(verboseflags & code) { printf("%s:%d: ", __FILE__, __LINE__); statement } + +#else +#define TRACE(code, statement) +#endif + +#define NOREC_ENTER(varname) \ + static int varname = 0; \ + int mustunlock = 0; \ + if(!intr_disabled()) { lock; mustunlock = 1; } \ + if(varname) { \ + minix_panic(#varname " recursive enter", __LINE__); \ + } \ + varname = 1; + +#define NOREC_RETURN(varname, v) do { \ + if(!varname) \ + minix_panic(#varname " flag off", __LINE__); \ + if(!intr_disabled()) \ + minix_panic(#varname " interrupts on", __LINE__); \ + varname = 0; \ + if(mustunlock) { unlock; } \ + return v; \ + } while(0) + +#if DEBUG_VMASSERT +#define vmassert(t) { \ + if(!(t)) { minix_panic("vm: assert " #t " failed\n", __LINE__); } } +#else +#define vmassert(t) { } +#endif #endif /* DEBUG_H */ diff --git a/kernel/glo.h b/kernel/glo.h index e3ed5735b..208818353 100755 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -16,6 +16,7 @@ #include #include #include "config.h" +#include "debug.h" /* Variables relating to shutting down MINIX. */ EXTERN char kernel_exception; /* TRUE after system exceptions */ @@ -29,14 +30,13 @@ EXTERN struct k_randomness krandom; /* gather kernel random information */ EXTERN struct loadinfo kloadinfo; /* status of load average */ /* Process scheduling information and the kernel reentry count. */ -EXTERN struct proc *prev_ptr; /* previously running process */ EXTERN struct proc *proc_ptr; /* pointer to currently running process */ EXTERN struct proc *next_ptr; /* next process to run after restart() */ +EXTERN struct proc *prev_ptr; EXTERN struct proc *bill_ptr; /* process to bill for clock ticks */ EXTERN struct proc *vmrestart; /* first process on vmrestart queue */ EXTERN struct proc *vmrequest; /* first process on vmrequest queue */ EXTERN struct proc *pagefaults; /* first process on pagefault queue */ -EXTERN struct proc *softnotify; /* first process on softnotify queue */ EXTERN char k_reenter; /* kernel reentry count (entry count less 1) */ EXTERN unsigned lost_ticks; /* clock ticks counted outside clock task */ @@ -47,32 +47,6 @@ EXTERN int irq_actids[NR_IRQ_VECTORS]; /* IRQ ID bits active */ EXTERN int irq_use; /* map of all in-use irq's */ EXTERN u32_t system_hz; /* HZ value */ -EXTERN struct ipc_stats -{ - unsigned long deadproc; - unsigned long bad_endpoint; - unsigned long dst_not_allowed; - unsigned long bad_call; - unsigned long call_not_allowed; - unsigned long bad_buffer; - unsigned long deadlock; - unsigned long not_ready; - unsigned long src_died; - unsigned long dst_died; - unsigned long no_priv; - unsigned long bad_size; - unsigned long bad_senda; - u64_t total; -} ipc_stats; -extern endpoint_t ipc_stats_target; - -EXTERN struct system_stats -{ - unsigned long bad_req; - unsigned long not_allowed; - u64_t total; -} sys_stats; - /* Miscellaneous. */ EXTERN reg_t mon_ss, mon_sp; /* boot monitor stack */ EXTERN int mon_return; /* true if we can return to monitor */ @@ -85,18 +59,14 @@ EXTERN char params_buffer[512]; /* boot monitor parameters */ EXTERN int minix_panicing; EXTERN int locklevel; -EXTERN unsigned long cr3switch; -EXTERN unsigned long cr3reload; +#if DEBUG_TRACE +EXTERN int verboseflags; +#endif /* VM */ -EXTERN phys_bytes vm_base; -EXTERN phys_bytes vm_size; -EXTERN phys_bytes vm_mem_high; EXTERN int vm_running; -EXTERN int must_notify_vm; - -/* Verbose flags (debugging). */ -EXTERN int verbose_vm; +EXTERN int catch_pagefaults; +EXTERN struct proc *ptproc; /* Timing */ EXTERN util_timingdata_t timingdata[TIMING_CATEGORIES]; diff --git a/kernel/main.c b/kernel/main.c index f07997cdd..b847b2bef 100755 --- a/kernel/main.c +++ b/kernel/main.c @@ -17,6 +17,7 @@ #include #include #include "proc.h" +#include "debug.h" /* Prototype declarations for PRIVATE functions. */ FORWARD _PROTOTYPE( void announce, (void)); @@ -161,6 +162,9 @@ PUBLIC void main() rp->p_reg.sp -= sizeof(reg_t); } + /* scheduling functions depend on proc_ptr pointing somewhere. */ + if(!proc_ptr) proc_ptr = rp; + /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. @@ -186,8 +190,21 @@ PUBLIC void main() /* MINIX is now ready. All boot image processes are on the ready queue. * Return to the assembly code to start running the current process. */ - bill_ptr = proc_addr(IDLE); /* it has to point somewhere */ + bill_ptr = proc_addr(IDLE); /* it has to point somewhere */ announce(); /* print MINIX startup banner */ +/* Warnings for sanity checks that take time. These warnings are printed + * so it's a clear warning no full release should be done with them + * enabled. + */ +#if DEBUG_SCHED_CHECK + FIXME("DEBUG_SCHED_CHECK enabled"); +#endif +#if DEBUG_VMASSERT + FIXME("DEBUG_VMASSERT enabled"); +#endif +#if DEBUG_PROC_CHECK + FIXME("PROC check enabled"); +#endif restart(); } @@ -204,6 +221,8 @@ PRIVATE void announce(void) "Copyright 2009, Vrije Universiteit, Amsterdam, The Netherlands\n", OS_RELEASE, OS_VERSION); kprintf("MINIX is open source software, see http://www.minix3.org\n"); + + FIXME("pm, vfs, etc own page table"); } /*===========================================================================* diff --git a/kernel/proc.c b/kernel/proc.c index bb3d8543f..7b9250173 100755 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -6,10 +6,7 @@ * * As well as several entry points used from the interrupt and task level: * - * lock_notify: notify a process of a system event * lock_send: send a message to a process - * lock_enqueue: put a process on one of the scheduling queues - * lock_dequeue: remove a process from the scheduling queues * * Changes: * Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder) @@ -57,7 +54,6 @@ FORWARD _PROTOTYPE( int mini_send, (struct proc *caller_ptr, int dst_e, message *m_ptr, int flags)); FORWARD _PROTOTYPE( int mini_receive, (struct proc *caller_ptr, int src, message *m_ptr, int flags)); -FORWARD _PROTOTYPE( int mini_notify, (struct proc *caller_ptr, int dst)); FORWARD _PROTOTYPE( int mini_senda, (struct proc *caller_ptr, asynmsg_t *table, size_t size)); FORWARD _PROTOTYPE( int deadlock, (int function, @@ -67,8 +63,10 @@ FORWARD _PROTOTYPE( int try_one, (struct proc *src_ptr, struct proc *dst_ptr)); FORWARD _PROTOTYPE( void sched, (struct proc *rp, int *queue, int *front)); FORWARD _PROTOTYPE( void pick_proc, (void)); -#define BuildMess(m_ptr, src, dst_ptr) \ - (m_ptr)->m_source = proc_addr(src)->p_endpoint; \ +#define PICK_ANY 1 +#define PICK_HIGHERONLY 2 + +#define BuildNotifyMessage(m_ptr, src, dst_ptr) \ (m_ptr)->m_type = NOTIFY_FROM(src); \ (m_ptr)->NOTIFY_TIMESTAMP = get_uptime(); \ switch (src) { \ @@ -82,49 +80,88 @@ FORWARD _PROTOTYPE( void pick_proc, (void)); break; \ } -#define CopyMess(s,sp,sm,dp,dm) do { \ - vir_bytes dstlin; \ - endpoint_t e = proc_addr(s)->p_endpoint; \ - struct vir_addr src, dst; \ - int r; \ - if((dstlin = umap_local((dp), D, (vir_bytes) dm, sizeof(message))) == 0){\ - minix_panic("CopyMess: umap_local failed", __LINE__); \ - } \ - \ - if(vm_running && \ - (r=vm_checkrange((dp), (dp), dstlin, sizeof(message), 1, 0)) != OK) { \ - if(r != VMSUSPEND) \ - minix_panic("CopyMess: vm_checkrange error", __LINE__); \ - (dp)->p_vmrequest.saved.msgcopy.dst = (dp); \ - (dp)->p_vmrequest.saved.msgcopy.dst_v = (vir_bytes) dm; \ - if(data_copy((sp)->p_endpoint, \ - (vir_bytes) (sm), SYSTEM, \ - (vir_bytes) &(dp)->p_vmrequest.saved.msgcopy.msgbuf, \ - sizeof(message)) != OK) { \ - minix_panic("CopyMess: data_copy failed", __LINE__);\ - } \ - (dp)->p_vmrequest.saved.msgcopy.msgbuf.m_source = e; \ - (dp)->p_vmrequest.type = VMSTYPE_MSGCOPY; \ - } else { \ - src.proc_nr_e = (sp)->p_endpoint; \ - dst.proc_nr_e = (dp)->p_endpoint; \ - src.segment = dst.segment = D; \ - src.offset = (vir_bytes) (sm); \ - dst.offset = (vir_bytes) (dm); \ - if(virtual_copy(&src, &dst, sizeof(message)) != OK) { \ - kprintf("copymess: copy %d:%lx to %d:%lx failed\n",\ - (sp)->p_endpoint, (sm), (dp)->p_endpoint, dm);\ - minix_panic("CopyMess: virtual_copy (1) failed", __LINE__); \ - } \ - src.proc_nr_e = SYSTEM; \ - src.offset = (vir_bytes) &e; \ - if(virtual_copy(&src, &dst, sizeof(e)) != OK) { \ - kprintf("copymess: copy %d:%lx to %d:%lx\n", \ - (sp)->p_endpoint, (sm), (dp)->p_endpoint, dm);\ - minix_panic("CopyMess: virtual_copy (2) failed", __LINE__); \ - } \ - } \ -} while(0) +/*===========================================================================* + * QueueMess * + *===========================================================================*/ +PRIVATE int QueueMess(endpoint_t ep, vir_bytes msg_lin, struct proc *dst) +{ + int k; + phys_bytes addr; + NOREC_ENTER(queuemess); + /* Queue a message from the src process (in memory) to the dst + * process (using dst process table entry). Do actual copy to + * kernel here; it's an error if the copy fails into kernel. + */ + vmassert(!(dst->p_misc_flags & MF_DELIVERMSG)); + vmassert(dst->p_delivermsg_lin); + vmassert(isokendpt(ep, &k)); + +#if 0 + if(INMEMORY(dst)) { + PHYS_COPY_CATCH(msg_lin, dst->p_delivermsg_lin, + sizeof(message), addr); + if(!addr) { + PHYS_COPY_CATCH(vir2phys(&ep), dst->p_delivermsg_lin, + sizeof(ep), addr); + if(!addr) { + NOREC_RETURN(queuemess, OK); + } + } + } +#else + FIXME("in-memory process copy"); +#endif + + PHYS_COPY_CATCH(msg_lin, vir2phys(&dst->p_delivermsg), sizeof(message), addr); + if(addr) { + NOREC_RETURN(queuemess, EFAULT); + } + + dst->p_delivermsg.m_source = ep; + dst->p_misc_flags |= MF_DELIVERMSG; + + NOREC_RETURN(queuemess, OK); +} + +/*===========================================================================* + * schedcheck * + *===========================================================================*/ +PUBLIC void schedcheck(void) +{ + /* This function is called an instant before proc_ptr is + * to be scheduled again. + */ + NOREC_ENTER(schedch); + vmassert(intr_disabled()); + if(next_ptr) { + proc_ptr = next_ptr; + next_ptr = NULL; + } + vmassert(proc_ptr); + vmassert(!proc_ptr->p_rts_flags); + while(proc_ptr->p_misc_flags & MF_DELIVERMSG) { + vmassert(!next_ptr); + vmassert(!proc_ptr->p_rts_flags); + TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n", + proc_ptr->p_name, proc_ptr->p_endpoint);); + if(delivermsg(proc_ptr) == VMSUSPEND) { + vmassert(next_ptr); + TRACE(VF_SCHEDULING, printf("suspending %s / %d\n", + proc_ptr->p_name, proc_ptr->p_endpoint);); + vmassert(proc_ptr->p_rts_flags); + vmassert(next_ptr != proc_ptr); + proc_ptr = next_ptr; + vmassert(!proc_ptr->p_rts_flags); + next_ptr = NULL; + } + } + TRACE(VF_SCHEDULING, printf("starting %s / %d\n", + proc_ptr->p_name, proc_ptr->p_endpoint);); +#if DEBUG_TRACE + proc_ptr->p_schedules++; +#endif + NOREC_RETURN(schedch, ); +} /*===========================================================================* * sys_call * @@ -146,8 +183,13 @@ long bit_map; /* notification event set or flags */ int src_dst_p; /* Process slot number */ size_t msg_size; - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.total= add64u(ipc_stats.total, 1); +#if DEBUG_SCHED_CHECK + if(caller_ptr->p_misc_flags & MF_DELIVERMSG) { + kprintf("sys_call: MF_DELIVERMSG on for %s / %d\n", + caller_ptr->p_name, caller_ptr->p_endpoint); + minix_panic("MF_DELIVERMSG on", NO_NUM); + } +#endif #if 0 if(src_dst_e != 4 && src_dst_e != 5 && @@ -163,12 +205,10 @@ long bit_map; /* notification event set or flags */ } #endif -#if 1 +#if DEBUG_SCHED_CHECK if (RTS_ISSET(caller_ptr, SLOT_FREE)) { kprintf("called by the dead?!?\n"); - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.deadproc++; return EINVAL; } #endif @@ -188,12 +228,10 @@ long bit_map; /* notification event set or flags */ { if (call_nr != RECEIVE) { -#if DEBUG_ENABLE_IPC_WARNINGS +#if 0 kprintf("sys_call: trap %d by %d with bad endpoint %d\n", call_nr, proc_nr(caller_ptr), src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_endpoint++; return EINVAL; } src_dst_p = src_dst_e; @@ -202,12 +240,10 @@ long bit_map; /* notification event set or flags */ { /* Require a valid source and/or destination process. */ if(!isokendpt(src_dst_e, &src_dst_p)) { -#if DEBUG_ENABLE_IPC_WARNINGS +#if 0 kprintf("sys_call: trap %d by %d with bad endpoint %d\n", call_nr, proc_nr(caller_ptr), src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_endpoint++; return EDEADSRCDST; } @@ -221,10 +257,8 @@ long bit_map; /* notification event set or flags */ #if DEBUG_ENABLE_IPC_WARNINGS kprintf( "sys_call: ipc mask denied trap %d from %d to %d\n", - call_nr, proc_nr(caller_ptr), src_dst_p); + call_nr, caller_ptr->p_endpoint, src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_not_allowed++; return(ECALLDENIED); /* call denied by ipc mask */ } } @@ -237,8 +271,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", call_nr, proc_nr(caller_ptr), src_dst_p); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_call++; return(ETRAPDENIED); /* trap denied by mask or kernel */ } @@ -251,8 +283,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", call_nr, proc_nr(caller_ptr), src_dst_p); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.call_not_allowed++; return(ETRAPDENIED); /* trap denied by mask or kernel */ } @@ -261,8 +291,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", call_nr, proc_nr(caller_ptr), src_dst_e); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.call_not_allowed++; return(ETRAPDENIED); /* trap denied by mask or kernel */ } @@ -283,61 +311,6 @@ long bit_map; /* notification event set or flags */ msg_size = sizeof(*m_ptr); } - /* If the call involves a message buffer, i.e., for SEND, SENDREC, - * or RECEIVE, check the message pointer. This check allows a message to be - * anywhere in data or stack or gap. It will have to be made more elaborate - * for machines which don't have the gap mapped. - * - * We use msg_size decided above. - */ - if (call_nr == SEND || call_nr == SENDREC || - call_nr == RECEIVE || call_nr == SENDA || call_nr == SENDNB) { - int r; - phys_bytes lin; - - /* Map to linear address. */ - if(msg_size > 0 && - (lin = umap_local(caller_ptr, D, (vir_bytes) m_ptr, msg_size)) == 0) { - kprintf("umap_local failed for %s / %d on 0x%lx size %d\n", - caller_ptr->p_name, caller_ptr->p_endpoint, - m_ptr, msg_size); - return EFAULT; - } - - /* Check if message pages in calling process are mapped. - * We don't have to check the recipient if this is a send, - * because this code will do that before its receive() starts. - * - * It is important the range is verified as _writable_, because - * the kernel will want to write to the SENDA buffer in the future, - * and those pages may not be shared between processes. - */ - - if(vm_running && msg_size > 0 && - (r=vm_checkrange(caller_ptr, caller_ptr, lin, msg_size, 1, 0)) != OK) { - if(r != VMSUSPEND) { - kprintf("SYSTEM:sys_call:vm_checkrange: err %d\n", r); - return r; - } - - /* We can't go ahead with this call. Caller is suspended - * and we have to save the state in its process struct. - */ - caller_ptr->p_vmrequest.saved.sys_call.call_nr = call_nr; - caller_ptr->p_vmrequest.saved.sys_call.m_ptr = m_ptr; - caller_ptr->p_vmrequest.saved.sys_call.src_dst_e = src_dst_e; - caller_ptr->p_vmrequest.saved.sys_call.bit_map = bit_map; - caller_ptr->p_vmrequest.type = VMSTYPE_SYS_CALL; - - kprintf("SYSTEM: %s:%d: suspending call 0x%lx on ipc buffer 0x%lx length 0x%lx\n", - caller_ptr->p_name, caller_ptr->p_endpoint, call_nr, m_ptr, msg_size); - - /* vm_checkrange() will have suspended caller with VMREQUEST. */ - return OK; - } - - } - /* Check for a possible deadlock for blocking SEND(REC) and RECEIVE. */ if (call_nr == SEND || call_nr == SENDREC || call_nr == RECEIVE) { if (group_size = deadlock(call_nr, caller_ptr, src_dst_p)) { @@ -345,8 +318,6 @@ long bit_map; /* notification event set or flags */ kprintf("sys_call: trap %d from %d to %d deadlocked, group size %d\n", call_nr, proc_nr(caller_ptr), src_dst_p, group_size); #endif - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.deadlock++; return(ELOCKED); } } @@ -362,7 +333,7 @@ long bit_map; /* notification event set or flags */ switch(call_nr) { case SENDREC: /* A flag is set so that notifications cannot interrupt SENDREC. */ - caller_ptr->p_misc_flags |= REPLY_PENDING; + caller_ptr->p_misc_flags |= MF_REPLY_PEND; /* fall through */ case SEND: result = mini_send(caller_ptr, src_dst_e, m_ptr, 0); @@ -371,11 +342,11 @@ long bit_map; /* notification event set or flags */ /* fall through for SENDREC */ case RECEIVE: if (call_nr == RECEIVE) - caller_ptr->p_misc_flags &= ~REPLY_PENDING; + caller_ptr->p_misc_flags &= ~MF_REPLY_PEND; result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0); break; case NOTIFY: - result = mini_notify(caller_ptr, src_dst_p); + result = mini_notify(caller_ptr, src_dst_e); break; case SENDNB: result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING); @@ -460,22 +431,6 @@ int src_dst; /* src or dst process */ return(0); /* not a deadlock */ } -/*===========================================================================* - * sys_call_restart * - *===========================================================================*/ -PUBLIC void sys_call_restart(caller) -struct proc *caller; -{ - int r; - kprintf("restarting sys_call code 0x%lx, " - "m_ptr 0x%lx, srcdst %d, bitmap 0x%lx, but not really\n", - caller->p_vmrequest.saved.sys_call.call_nr, - caller->p_vmrequest.saved.sys_call.m_ptr, - caller->p_vmrequest.saved.sys_call.src_dst_e, - caller->p_vmrequest.saved.sys_call.bit_map); - caller->p_reg.retreg = r; -} - /*===========================================================================* * mini_send * *===========================================================================*/ @@ -492,14 +447,19 @@ int flags; register struct proc *dst_ptr; register struct proc **xpp; int dst_p; + phys_bytes linaddr; + vir_bytes addr; + int r; + if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) m_ptr, + sizeof(message)))) { + return EFAULT; + } dst_p = _ENDPOINT_P(dst_e); dst_ptr = proc_addr(dst_p); if (RTS_ISSET(dst_ptr, NO_ENDPOINT)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_died++; return EDSTDIED; } @@ -508,18 +468,20 @@ int flags; */ if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) { /* Destination is indeed waiting for this message. */ - CopyMess(caller_ptr->p_nr, caller_ptr, m_ptr, dst_ptr, - dst_ptr->p_messbuf); + vmassert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG)); + if((r=QueueMess(caller_ptr->p_endpoint, linaddr, dst_ptr)) != OK) + return r; RTS_UNSET(dst_ptr, RECEIVING); } else { if(flags & NON_BLOCKING) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.not_ready++; return(ENOTREADY); } /* Destination is not waiting. Block and dequeue caller. */ - caller_ptr->p_messbuf = m_ptr; + PHYS_COPY_CATCH(linaddr, vir2phys(&caller_ptr->p_sendmsg), + sizeof(message), addr); + + if(addr) { return EFAULT; } RTS_SET(caller_ptr, SENDING); caller_ptr->p_sendto_e = dst_e; @@ -552,6 +514,18 @@ int flags; sys_map_t *map; bitchunk_t *chunk; int i, r, src_id, src_proc_nr, src_p; + phys_bytes linaddr; + + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); + + if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) m_ptr, + sizeof(message)))) { + return EFAULT; + } + + /* This is where we want our message. */ + caller_ptr->p_delivermsg_lin = linaddr; + caller_ptr->p_delivermsg_vir = (vir_bytes) m_ptr; if(src_e == ANY) src_p = ANY; else @@ -559,8 +533,6 @@ int flags; okendpt(src_e, &src_p); if (RTS_ISSET(proc_addr(src_p), NO_ENDPOINT)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.src_died++; return ESRCDIED; } } @@ -573,10 +545,11 @@ int flags; if (!RTS_ISSET(caller_ptr, SENDING)) { /* Check if there are pending notifications, except for SENDREC. */ - if (! (caller_ptr->p_misc_flags & REPLY_PENDING)) { + if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) { map = &priv(caller_ptr)->s_notify_pending; for (chunk=&map->chunk[0]; chunk<&map->chunk[NR_SYS_CHUNKS]; chunk++) { + endpoint_t hisep; /* Find a pending notification from the requested source. */ if (! *chunk) continue; /* no bits in chunk */ @@ -593,8 +566,13 @@ int flags; *chunk &= ~(1 << i); /* no longer pending */ /* Found a suitable source, deliver the notification message. */ - BuildMess(&m, src_proc_nr, caller_ptr); /* assemble message */ - CopyMess(src_proc_nr, proc_addr(HARDWARE), &m, caller_ptr, m_ptr); + BuildNotifyMessage(&m, src_proc_nr, caller_ptr); /* assemble message */ + hisep = proc_addr(src_proc_nr)->p_endpoint; + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); + vmassert(src_e == ANY || hisep == src_e); + if((r=QueueMess(hisep, vir2phys(&m), caller_ptr)) != OK) { + minix_panic("mini_receive: local QueueMess failed", NO_NUM); + } return(OK); /* report success */ } } @@ -603,20 +581,20 @@ int flags; xpp = &caller_ptr->p_caller_q; while (*xpp != NIL_PROC) { if (src_e == ANY || src_p == proc_nr(*xpp)) { -#if 1 +#if DEBUG_SCHED_CHECK if (RTS_ISSET(*xpp, SLOT_FREE) || RTS_ISSET(*xpp, NO_ENDPOINT)) { kprintf("%d: receive from %d; found dead %d (%s)?\n", caller_ptr->p_endpoint, src_e, (*xpp)->p_endpoint, (*xpp)->p_name); - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.deadproc++; return EINVAL; } #endif /* Found acceptable message. Copy it and update status. */ - CopyMess((*xpp)->p_nr, *xpp, (*xpp)->p_messbuf, caller_ptr, m_ptr); + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); + QueueMess((*xpp)->p_endpoint, + vir2phys(&(*xpp)->p_sendmsg), caller_ptr); RTS_UNSET(*xpp, SENDING); *xpp = (*xpp)->p_q_link; /* remove from queue */ return(OK); /* report success */ @@ -635,7 +613,6 @@ int flags; } else { - caller_ptr->p_messbuf = m_ptr; r= try_async(caller_ptr); } if (r == OK) @@ -648,12 +625,9 @@ int flags; */ if ( ! (flags & NON_BLOCKING)) { caller_ptr->p_getfrom_e = src_e; - caller_ptr->p_messbuf = m_ptr; RTS_SET(caller_ptr, RECEIVING); return(OK); } else { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.not_ready++; return(ENOTREADY); } } @@ -661,26 +635,41 @@ int flags; /*===========================================================================* * mini_notify * *===========================================================================*/ -PRIVATE int mini_notify(caller_ptr, dst) +PUBLIC int mini_notify(caller_ptr, dst_e) register struct proc *caller_ptr; /* sender of the notification */ -int dst; /* which process to notify */ +endpoint_t dst_e; /* which process to notify */ { - register struct proc *dst_ptr = proc_addr(dst); + register struct proc *dst_ptr; int src_id; /* source id for late delivery */ message m; /* the notification message */ + int r; + int proc_nr; + int dst_p; + + vmassert(intr_disabled()); + + if (!isokendpt(dst_e, &dst_p)) { + util_stacktrace(); + kprintf("mini_notify: bogus endpoint %d\n", dst_e); + return EDEADSRCDST; + } + + dst_ptr = proc_addr(dst_p); /* Check to see if target is blocked waiting for this message. A process * can be both sending and receiving during a SENDREC system call. */ if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) && - ! (dst_ptr->p_misc_flags & REPLY_PENDING)) { + ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) { /* Destination is indeed waiting for a message. Assemble a notification * message and deliver it. Copy from pseudo-source HARDWARE, since the * message is in the kernel's address space. */ - BuildMess(&m, proc_nr(caller_ptr), dst_ptr); - CopyMess(proc_nr(caller_ptr), proc_addr(HARDWARE), &m, - dst_ptr, dst_ptr->p_messbuf); + BuildNotifyMessage(&m, proc_nr(caller_ptr), dst_ptr); + vmassert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG)); + if((r=QueueMess(caller_ptr->p_endpoint, vir2phys(&m), dst_ptr)) != OK) { + minix_panic("mini_notify: local QueueMess failed", NO_NUM); + } RTS_UNSET(dst_ptr, RECEIVING); return(OK); } @@ -725,21 +714,20 @@ struct proc *caller_ptr; asynmsg_t *table; size_t size; { - int i, dst_p, done, do_notify; + int i, dst_p, done, do_notify, r; unsigned flags; struct proc *dst_ptr; struct priv *privp; message *m_ptr; asynmsg_t tabent; vir_bytes table_v = (vir_bytes) table; + vir_bytes linaddr; privp= priv(caller_ptr); if (!(privp->s_flags & SYS_PROC)) { kprintf( "mini_senda: warning caller has no privilege structure\n"); - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.no_priv++; return EPERM; } @@ -753,6 +741,13 @@ size_t size; return OK; } + if(!(linaddr = umap_local(caller_ptr, D, (vir_bytes) table, + size * sizeof(*table)))) { + printf("mini_senda: umap_local failed; 0x%lx len 0x%lx\n", + table, size * sizeof(*table)); + return EFAULT; + } + /* Limit size to something reasonable. An arbitrary choice is 16 * times the number of process table entries. * @@ -761,8 +756,6 @@ size_t size; */ if (size > 16*(NR_TASKS + NR_PROCS)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_size++; return EDOM; } @@ -784,8 +777,6 @@ size_t size; if (flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY) || !(flags & AMF_VALID)) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_senda++; return EINVAL; } @@ -799,9 +790,6 @@ size_t size; if (!isokendpt(tabent.dst, &dst_p)) { /* Bad destination, report the error */ - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_endpoint++; - tabent.result= EDEADSRCDST; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; @@ -815,9 +803,6 @@ size_t size; if (!may_send_to(caller_ptr, dst_p)) { /* Send denied by IPC mask */ - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_not_allowed++; - tabent.result= ECALLDENIED; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; @@ -838,9 +823,6 @@ size_t size; /* NO_ENDPOINT should be removed */ if (dst_ptr->p_rts_flags & NO_ENDPOINT) { - if (caller_ptr->p_endpoint == ipc_stats_target) - ipc_stats.dst_died++; - tabent.result= EDSTDIED; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; @@ -864,12 +846,13 @@ size_t size; m_ptr= &table[i].msg; /* Note: pointer in the * caller's address space. */ - CopyMess(caller_ptr->p_nr, caller_ptr, m_ptr, dst_ptr, - dst_ptr->p_messbuf); + /* Copy message from sender. */ + tabent.result= QueueMess(caller_ptr->p_endpoint, + linaddr + (vir_bytes) &table[i].msg - + (vir_bytes) table, dst_ptr); + if(tabent.result == OK) + RTS_UNSET(dst_ptr, RECEIVING); - RTS_UNSET(dst_ptr, RECEIVING); - - tabent.result= OK; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; A_INSERT(i, flags); @@ -892,13 +875,6 @@ size_t size; { privp->s_asyntab= (vir_bytes)table; privp->s_asynsize= size; -#if 0 - if(caller_ptr->p_endpoint > INIT_PROC_NR) { - kprintf("kernel: %s (%d) asynsend table at 0x%lx, %d\n", - caller_ptr->p_name, caller_ptr->p_endpoint, - table, size); - } -#endif } return OK; } @@ -913,7 +889,7 @@ struct proc *caller_ptr; int r; struct priv *privp; struct proc *src_ptr; - + /* Try all privilege structures */ for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) { @@ -921,13 +897,10 @@ struct proc *caller_ptr; continue; if (privp->s_asynsize == 0) continue; -#if 0 - kprintf("try_async: found asyntable for proc %d\n", - privp->s_proc_nr); -#endif src_ptr= proc_addr(privp->s_proc_nr); if (!may_send_to(src_ptr, proc_nr(caller_ptr))) continue; + vmassert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); r= try_one(src_ptr, caller_ptr); if (r == OK) return r; @@ -957,6 +930,7 @@ struct proc *dst_ptr; asynmsg_t tabent; vir_bytes table_v; struct proc *caller_ptr; + int r; privp= priv(src_ptr); size= privp->s_asynsize; @@ -986,8 +960,6 @@ struct proc *dst_ptr; { kprintf("try_one: bad bits in table\n"); privp->s_asynsize= 0; - if (src_ptr->p_endpoint == ipc_stats_target) - ipc_stats.bad_senda++; return EINVAL; } @@ -1015,10 +987,11 @@ struct proc *dst_ptr; m_ptr= &table_ptr[i].msg; /* Note: pointer in the * caller's address space. */ - CopyMess(src_ptr->p_nr, src_ptr, m_ptr, dst_ptr, - dst_ptr->p_messbuf); + A_RETRIEVE(i, msg); + r = QueueMess(src_ptr->p_endpoint, vir2phys(&tabent.msg), + dst_ptr); - tabent.result= OK; + tabent.result= r; A_INSERT(i, result); tabent.flags= flags | AMF_DONE; A_INSERT(i, flags); @@ -1034,7 +1007,7 @@ struct proc *dst_ptr; return EAGAIN; } -/*===========================================================================* + /*===========================================================================* * lock_notify * *===========================================================================*/ PUBLIC int lock_notify(src_e, dst_e) @@ -1047,62 +1020,25 @@ int dst_e; /* (endpoint) who is to be notified */ * the first kernel entry (hardware interrupt, trap, or exception). Locking * is done by temporarily disabling interrupts. */ - int result, src, dst; + int result, src_p; - if(!isokendpt(src_e, &src) || !isokendpt(dst_e, &dst)) + vmassert(!intr_disabled()); + + if (!isokendpt(src_e, &src_p)) { + kprintf("lock_notify: bogus src: %d\n", src_e); return EDEADSRCDST; - - /* Exception or interrupt occurred, thus already locked. */ - if (k_reenter >= 0) { - result = mini_notify(proc_addr(src), dst); } - /* Call from task level, locking is required. */ - else { lock; - result = mini_notify(proc_addr(src), dst); + vmassert(intr_disabled()); + result = mini_notify(proc_addr(src_p), dst_e); + vmassert(intr_disabled()); unlock; - } + vmassert(!intr_disabled()); + return(result); } -/*===========================================================================* - * soft_notify * - *===========================================================================*/ -PUBLIC int soft_notify(dst_e) -int dst_e; /* (endpoint) who is to be notified */ -{ - int dst, u = 0; - struct proc *dstp, *sys = proc_addr(SYSTEM); - -/* Delayed interface to notify() from SYSTEM that is safe/easy to call - * from more places than notify(). - */ - if(!intr_disabled()) { lock; u = 1; } - - { - if(!isokendpt(dst_e, &dst)) - minix_panic("soft_notify to dead ep", dst_e); - - dstp = proc_addr(dst); - - if(!dstp->p_softnotified) { - dstp->next_soft_notify = softnotify; - softnotify = dstp; - dstp->p_softnotified = 1; - - if (RTS_ISSET(sys, RECEIVING)) { - sys->p_messbuf->m_source = SYSTEM; - RTS_UNSET(sys, RECEIVING); - } - } - } - - if(u) { unlock; } - - return OK; -} - /*===========================================================================* * enqueue * *===========================================================================*/ @@ -1117,15 +1053,19 @@ register struct proc *rp; /* this process is now runnable */ int q; /* scheduling queue to use */ int front; /* add to front or back */ + NOREC_ENTER(enqueuefunc); + #if DEBUG_SCHED_CHECK if(!intr_disabled()) { minix_panic("enqueue with interrupts enabled", NO_NUM); } - CHECK_RUNQUEUES; if (rp->p_ready) minix_panic("enqueue already ready process", NO_NUM); #endif /* Determine where to insert to process. */ sched(rp, &q, &front); + vmassert(q >= 0); + vmassert(q < IDLE_Q || rp->p_endpoint == IDLE); + /* Now add the process to the queue. */ if (rdy_head[q] == NIL_PROC) { /* add to empty queue */ rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */ @@ -1141,19 +1081,25 @@ register struct proc *rp; /* this process is now runnable */ rp->p_nextready = NIL_PROC; /* mark new end */ } - /* Now select the next process to run, if there isn't a current - * process yet or current process isn't ready any more, or - * it's PREEMPTIBLE. - */ - if(!proc_ptr || proc_ptr->p_rts_flags || - (priv(proc_ptr)->s_flags & PREEMPTIBLE)) { - pick_proc(); - } - #if DEBUG_SCHED_CHECK rp->p_ready = 1; CHECK_RUNQUEUES; #endif + + /* Now select the next process to run, if there isn't a current + * process yet or current process isn't ready any more, or + * it's PREEMPTIBLE. + */ + vmassert(proc_ptr); + if((proc_ptr->p_priority > rp->p_priority) && + (priv(proc_ptr)->s_flags & PREEMPTIBLE)) + pick_proc(); + +#if DEBUG_SCHED_CHECK + CHECK_RUNQUEUES; +#endif + + NOREC_RETURN(enqueuefunc, ); } /*===========================================================================* @@ -1170,14 +1116,17 @@ register struct proc *rp; /* this process is no longer runnable */ register struct proc **xpp; /* iterate over queue */ register struct proc *prev_xp; + NOREC_ENTER(dequeuefunc); + +#if DEBUG_STACK_CHECK /* Side-effect for kernel: check if the task's stack still is ok? */ if (iskernelp(rp)) { if (*priv(rp)->s_stack_guard != STACK_GUARD) minix_panic("stack overrun by task", proc_nr(rp)); } +#endif #if DEBUG_SCHED_CHECK - CHECK_RUNQUEUES; if(!intr_disabled()) { minix_panic("dequeue with interrupts enabled", NO_NUM); } if (! rp->p_ready) minix_panic("dequeue() already unready process", NO_NUM); #endif @@ -1193,17 +1142,23 @@ register struct proc *rp; /* this process is no longer runnable */ *xpp = (*xpp)->p_nextready; /* replace with next chain */ if (rp == rdy_tail[q]) /* queue tail removed */ rdy_tail[q] = prev_xp; /* set new tail */ + +#if DEBUG_SCHED_CHECK + rp->p_ready = 0; + CHECK_RUNQUEUES; +#endif if (rp == proc_ptr || rp == next_ptr) /* active process removed */ - pick_proc(); /* pick new process to run */ + pick_proc(); /* pick new process to run */ break; } prev_xp = *xpp; /* save previous in chain */ } #if DEBUG_SCHED_CHECK - rp->p_ready = 0; CHECK_RUNQUEUES; #endif + + NOREC_RETURN(dequeuefunc, ); } /*===========================================================================* @@ -1249,25 +1204,29 @@ PRIVATE void pick_proc() * clock task can tell who to bill for system time. */ register struct proc *rp; /* process to run */ - int q; /* iterate over queues */ + int q; /* iterate over queues */ + + NOREC_ENTER(pick); /* Check each of the scheduling queues for ready processes. The number of * queues is defined in proc.h, and priorities are set in the task table. * The lowest queue contains IDLE, which is always ready. */ for (q=0; q < NR_SCHED_QUEUES; q++) { - if ( (rp = rdy_head[q]) != NIL_PROC) { - next_ptr = rp; /* run process 'rp' next */ -#if 0 - if(rp->p_endpoint != 4 && rp->p_endpoint != 5 && rp->p_endpoint != IDLE && rp->p_endpoint != SYSTEM) - kprintf("[run %s]", rp->p_name); -#endif - if (priv(rp)->s_flags & BILLABLE) - bill_ptr = rp; /* bill for system time */ - return; - } + int found = 0; + if(!(rp = rdy_head[q])) { + TRACE(VF_PICKPROC, printf("queue %d empty\n", q);); + continue; + } + TRACE(VF_PICKPROC, printf("found %s / %d on queue %d\n", + rp->p_name, rp->p_endpoint, q);); + next_ptr = rp; /* run process 'rp' next */ + vmassert(proc_ptr != next_ptr); + vmassert(!next_ptr->p_rts_flags); + if (priv(rp)->s_flags & BILLABLE) + bill_ptr = rp; /* bill for system time */ + NOREC_RETURN(pick, ); } - minix_panic("no ready process", NO_NUM); } /*===========================================================================* @@ -1286,9 +1245,11 @@ timer_t *tp; /* watchdog timer pointer */ clock_t next_period; /* time of next period */ int ticks_added = 0; /* total time added */ + vmassert(!intr_disabled()); + + lock; for (rp=BEG_PROC_ADDR; rpp_priority > rp->p_max_priority) { /* update priority? */ if (rp->p_rts_flags == 0) dequeue(rp); /* take off queue */ ticks_added += rp->p_quantum_size; /* do accounting */ @@ -1299,12 +1260,9 @@ timer_t *tp; /* watchdog timer pointer */ ticks_added += rp->p_quantum_size - rp->p_ticks_left; rp->p_ticks_left = rp->p_quantum_size; /* give new quantum */ } - unlock; } } -#if DEBUG - kprintf("ticks_added: %d\n", ticks_added); -#endif + unlock; /* Now schedule a new watchdog timer to balance the queues again. The * period depends on the total amount of quantum ticks added. @@ -1328,37 +1286,6 @@ message *m_ptr; /* pointer to message buffer */ return(result); } -/*===========================================================================* - * lock_enqueue * - *===========================================================================*/ -PUBLIC void lock_enqueue(rp) -struct proc *rp; /* this process is now runnable */ -{ -/* Safe gateway to enqueue() for tasks. */ - lock; - enqueue(rp); - unlock; -} - -/*===========================================================================* - * lock_dequeue * - *===========================================================================*/ -PUBLIC void lock_dequeue(rp) -struct proc *rp; /* this process is no longer runnable */ -{ -/* Safe gateway to dequeue() for tasks. */ - if (k_reenter >= 0) { - /* We're in an exception or interrupt, so don't lock (and ... - * don't unlock). - */ - dequeue(rp); - } else { - lock; - dequeue(rp); - unlock; - } -} - /*===========================================================================* * endpoint_lookup * *===========================================================================*/ @@ -1401,24 +1328,18 @@ int *p, fatalflag; *p = _ENDPOINT_P(e); if(!isokprocn(*p)) { #if DEBUG_ENABLE_IPC_WARNINGS -#if 0 kprintf("kernel:%s:%d: bad endpoint %d: proc %d out of range\n", file, line, e, *p); -#endif #endif } else if(isemptyn(*p)) { -#if DEBUG_ENABLE_IPC_WARNINGS #if 0 kprintf("kernel:%s:%d: bad endpoint %d: proc %d empty\n", file, line, e, *p); -#endif #endif } else if(proc_addr(*p)->p_endpoint != e) { #if DEBUG_ENABLE_IPC_WARNINGS -#if 0 kprintf("kernel:%s:%d: bad endpoint %d: proc %d has ept %d (generation %d vs. %d)\n", file, line, e, *p, proc_addr(*p)->p_endpoint, _ENDPOINT_G(e), _ENDPOINT_G(proc_addr(*p)->p_endpoint)); -#endif #endif } else ok = 1; if(!ok && fatalflag) { diff --git a/kernel/proc.h b/kernel/proc.h index aa3752f00..ac07514b0 100755 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -10,6 +10,7 @@ * struct proc, be sure to change sconst.h to match. */ #include +#include #include "const.h" #include "priv.h" @@ -39,7 +40,6 @@ struct proc { struct proc *p_nextready; /* pointer to next ready process */ struct proc *p_caller_q; /* head of list of procs wishing to send */ struct proc *p_q_link; /* link to next proc wishing to send */ - message *p_messbuf; /* pointer to passed message buffer */ int p_getfrom_e; /* from whom does process want to receive? */ int p_sendto_e; /* to whom does process want to send? */ @@ -49,6 +49,11 @@ struct proc { endpoint_t p_endpoint; /* endpoint number, generation-aware */ + message p_sendmsg; /* Message from this process if SENDING */ + message p_delivermsg; /* Message for this process if MF_DELIVERMSG */ + vir_bytes p_delivermsg_vir; /* Virtual addr this proc wants message at */ + vir_bytes p_delivermsg_lin; /* Linear addr this proc wants message at */ + /* If handler functions detect a process wants to do something with * memory that isn't present, VM has to fix it. Until it has asked * what needs to be done and fixed it, save necessary state here. @@ -60,28 +65,12 @@ struct proc { struct proc *nextrestart; /* next in vmrestart chain */ struct proc *nextrequestor; /* next in vmrequest chain */ #define VMSTYPE_SYS_NONE 0 -#define VMSTYPE_SYS_MESSAGE 1 -#define VMSTYPE_SYS_CALL 2 -#define VMSTYPE_MSGCOPY 3 +#define VMSTYPE_KERNELCALL 1 +#define VMSTYPE_DELIVERMSG 2 int type; /* suspended operation */ union { /* VMSTYPE_SYS_MESSAGE */ message reqmsg; /* suspended request message */ - - /* VMSTYPE_SYS_CALL */ - struct { - int call_nr; - message *m_ptr; - int src_dst_e; - long bit_map; - } sys_call; - - /* VMSTYPE_MSGCOPY */ - struct { - struct proc *dst; - vir_bytes dst_v; - message msgbuf; - } msgcopy; } saved; /* Parameters of request to VM */ @@ -92,10 +81,9 @@ struct proc { /* VM result when available */ int vmresult; - /* Target gets this set. (But caller and target can be - * the same, so we can't put this in the 'saved' union.) - */ - struct proc *requestor; +#if DEBUG_VMASSERT + char stacktrace[200]; +#endif /* If the suspended operation is a sys_call, its details are * stored here. @@ -110,21 +98,26 @@ struct proc { #define PMAGIC 0xC0FFEE1 int p_magic; /* check validity of proc pointers */ #endif + +#if DEBUG_TRACE + int p_schedules; +#endif }; /* Bits for the runtime flags. A process is runnable iff p_rts_flags == 0. */ -#define SLOT_FREE 0x01 /* process slot is free */ -#define NO_PRIORITY 0x02 /* process has been stopped */ -#define SENDING 0x04 /* process blocked trying to send */ -#define RECEIVING 0x08 /* process blocked trying to receive */ -#define SIGNALED 0x10 /* set when new kernel signal arrives */ -#define SIG_PENDING 0x20 /* unready while signal being processed */ -#define P_STOP 0x40 /* set when process is being traced */ -#define NO_PRIV 0x80 /* keep forked system process from running */ -#define NO_ENDPOINT 0x100 /* process cannot send or receive messages */ -#define VMINHIBIT 0x200 /* not scheduled until pagetable set by VM */ -#define PAGEFAULT 0x400 /* process has unhandled pagefault */ -#define VMREQUEST 0x800 /* originator of vm memory request */ +#define SLOT_FREE 0x01 /* process slot is free */ +#define NO_PRIORITY 0x02 /* process has been stopped */ +#define SENDING 0x04 /* process blocked trying to send */ +#define RECEIVING 0x08 /* process blocked trying to receive */ +#define SIGNALED 0x10 /* set when new kernel signal arrives */ +#define SIG_PENDING 0x20 /* unready while signal being processed */ +#define P_STOP 0x40 /* set when process is being traced */ +#define NO_PRIV 0x80 /* keep forked system process from running */ +#define NO_ENDPOINT 0x100 /* process cannot send or receive messages */ +#define VMINHIBIT 0x200 /* not scheduled until pagetable set by VM */ +#define PAGEFAULT 0x400 /* process has unhandled pagefault */ +#define VMREQUEST 0x800 /* originator of vm memory request */ +#define VMREQTARGET 0x1000 /* target of vm memory request */ /* These runtime flags can be tested and manipulated by these macros. */ @@ -134,49 +127,62 @@ struct proc { /* Set flag and dequeue if the process was runnable. */ #define RTS_SET(rp, f) \ do { \ + vmassert(intr_disabled()); \ if(!(rp)->p_rts_flags) { dequeue(rp); } \ (rp)->p_rts_flags |= (f); \ + vmassert(intr_disabled()); \ } while(0) /* Clear flag and enqueue if the process was not runnable but is now. */ #define RTS_UNSET(rp, f) \ do { \ int rts; \ - rts = (rp)->p_rts_flags; \ + vmassert(intr_disabled()); \ + rts = (rp)->p_rts_flags; \ (rp)->p_rts_flags &= ~(f); \ if(rts && !(rp)->p_rts_flags) { enqueue(rp); } \ + vmassert(intr_disabled()); \ } while(0) /* Set flag and dequeue if the process was runnable. */ #define RTS_LOCK_SET(rp, f) \ do { \ - if(!(rp)->p_rts_flags) { lock_dequeue(rp); } \ + int u = 0; \ + if(!intr_disabled()) { u = 1; lock; } \ + if(!(rp)->p_rts_flags) { dequeue(rp); } \ (rp)->p_rts_flags |= (f); \ + if(u) { unlock; } \ } while(0) /* Clear flag and enqueue if the process was not runnable but is now. */ #define RTS_LOCK_UNSET(rp, f) \ do { \ int rts; \ - rts = (rp)->p_rts_flags; \ + int u = 0; \ + if(!intr_disabled()) { u = 1; lock; } \ + rts = (rp)->p_rts_flags; \ (rp)->p_rts_flags &= ~(f); \ - if(rts && !(rp)->p_rts_flags) { lock_enqueue(rp); } \ + if(rts && !(rp)->p_rts_flags) { enqueue(rp); } \ + if(u) { unlock; } \ } while(0) /* Set flags to this value. */ #define RTS_LOCK_SETFLAGS(rp, f) \ do { \ - if(!(rp)->p_rts_flags && (f)) { lock_dequeue(rp); } \ - (rp)->p_rts_flags = (f); \ + int u = 0; \ + if(!intr_disabled()) { u = 1; lock; } \ + if(!(rp)->p_rts_flags && (f)) { dequeue(rp); } \ + (rp)->p_rts_flags = (f); \ + if(u) { unlock; } \ } while(0) /* Misc flags */ -#define REPLY_PENDING 0x01 /* reply to IPC_REQUEST is pending */ -#define VIRT_TIMER 0x02 /* process-virtual timer is running */ -#define PROF_TIMER 0x04 /* process-virtual profile timer is running */ -#define MF_VM 0x08 /* process uses VM */ +#define MF_REPLY_PEND 0x01 /* reply to IPC_REQUEST is pending */ +#define MF_VIRT_TIMER 0x02 /* process-virtual timer is running */ +#define MF_PROF_TIMER 0x04 /* process-virtual profile timer is running */ #define MF_ASYNMSG 0x10 /* Asynchrous message pending */ #define MF_FULLVM 0x20 +#define MF_DELIVERMSG 0x40 /* Copy message for him before running */ /* Scheduling priorities for p_priority. Values must start at zero (highest * priority) and increment. Priorities of the processes in the boot image diff --git a/kernel/proto.h b/kernel/proto.h index 7536765c5..929950585 100755 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -33,13 +33,12 @@ _PROTOTYPE( int sys_call, (int call_nr, int src_dst, message *m_ptr, long bit_map) ); _PROTOTYPE( void sys_call_restart, (struct proc *caller) ); _PROTOTYPE( int lock_notify, (int src, int dst) ); -_PROTOTYPE( int soft_notify, (int dst) ); +_PROTOTYPE( int mini_notify, (struct proc *src, endpoint_t dst) ); _PROTOTYPE( int lock_send, (int dst, message *m_ptr) ); -_PROTOTYPE( void lock_enqueue, (struct proc *rp) ); -_PROTOTYPE( void lock_dequeue, (struct proc *rp) ); _PROTOTYPE( void enqueue, (struct proc *rp) ); _PROTOTYPE( void dequeue, (struct proc *rp) ); _PROTOTYPE( void balance_queues, (struct timer *tp) ); +_PROTOTYPE( void schedcheck, (void) ); _PROTOTYPE( struct proc *endpoint_lookup, (endpoint_t ep) ); #if DEBUG_ENABLE_IPC_WARNINGS _PROTOTYPE( int isokendpt_f, (char *file, int line, endpoint_t e, int *p, int f)); @@ -91,6 +90,8 @@ _PROTOTYPE( void cons_seth, (int pos, int n) ); #define CHECK_RUNQUEUES check_runqueues_f(__FILE__, __LINE__) _PROTOTYPE( void check_runqueues_f, (char *file, int line) ); #endif +_PROTOTYPE( char *rtsflagstr, (int flags) ); +_PROTOTYPE( char *miscflagstr, (int flags) ); /* system/do_safecopy.c */ _PROTOTYPE( int verify_grant, (endpoint_t, endpoint_t, cp_grant_id_t, vir_bytes, @@ -106,18 +107,21 @@ _PROTOTYPE( void stop_profile_clock, (void) ); #endif /* functions defined in architecture-dependent files. */ -_PROTOTYPE( void phys_copy, (phys_bytes source, phys_bytes dest, +_PROTOTYPE( phys_bytes phys_copy, (phys_bytes source, phys_bytes dest, phys_bytes count) ); +_PROTOTYPE( void phys_copy_fault, (void)); #define virtual_copy(src, dst, bytes) virtual_copy_f(src, dst, bytes, 0) #define virtual_copy_vmcheck(src, dst, bytes) virtual_copy_f(src, dst, bytes, 1) _PROTOTYPE( int virtual_copy_f, (struct vir_addr *src, struct vir_addr *dst, vir_bytes bytes, int vmcheck) ); _PROTOTYPE( int data_copy, (endpoint_t from, vir_bytes from_addr, endpoint_t to, vir_bytes to_addr, size_t bytes)); +_PROTOTYPE( int data_copy_vmcheck, (endpoint_t from, vir_bytes from_addr, + endpoint_t to, vir_bytes to_addr, size_t bytes)); #define data_copy_to(d, p, v, n) data_copy(SYSTEM, (d), (p), (v), (n)); #define data_copy_from(d, p, v, n) data_copy((p), (v), SYSTEM, (d), (n)); _PROTOTYPE( void alloc_segments, (struct proc *rp) ); -_PROTOTYPE( void vm_init, (void) ); +_PROTOTYPE( void vm_init, (struct proc *first) ); _PROTOTYPE( void vm_map_range, (u32_t base, u32_t size, u32_t offset) ); _PROTOTYPE( int vm_copy, (vir_bytes src, struct proc *srcproc, vir_bytes dst, struct proc *dstproc, phys_bytes bytes)); @@ -130,7 +134,7 @@ _PROTOTYPE( phys_bytes umap_remote, (struct proc* rp, int seg, _PROTOTYPE( phys_bytes umap_virtual, (struct proc* rp, int seg, vir_bytes vir_addr, vir_bytes bytes) ); _PROTOTYPE( phys_bytes seg2phys, (U16_t) ); -_PROTOTYPE( void phys_memset, (phys_bytes source, unsigned long pattern, +_PROTOTYPE( int vm_phys_memset, (phys_bytes source, u8_t pattern, phys_bytes count) ); _PROTOTYPE( vir_bytes alloc_remote_segment, (u32_t *, segframe_t *, int, phys_bytes, vir_bytes, int)); @@ -164,5 +168,10 @@ _PROTOTYPE( int vm_checkrange, (struct proc *caller, struct proc *target, vir_bytes start, vir_bytes length, int writeflag, int checkonly)); _PROTOTYPE( void proc_stacktrace, (struct proc *proc) ); _PROTOTYPE( int vm_lookup, (struct proc *proc, vir_bytes virtual, vir_bytes *result, u32_t *ptent)); +_PROTOTYPE( int vm_suspend, (struct proc *caller, struct proc *target, + phys_bytes lin, phys_bytes size, int wrflag, int type)); +_PROTOTYPE( int delivermsg, (struct proc *target)); +_PROTOTYPE( phys_bytes arch_switch_copymsg, (struct proc *rp, message *m, + phys_bytes lin)); #endif /* PROTO_H */ diff --git a/kernel/system.c b/kernel/system.c index 992770ee7..cdc4cc656 100755 --- a/kernel/system.c +++ b/kernel/system.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,6 @@ char *callnames[NR_SYS_CALLS]; call_vec[(call_nr-KERNEL_CALL)] = (handler) FORWARD _PROTOTYPE( void initialize, (void)); -FORWARD _PROTOTYPE( void softnotify_check, (void)); FORWARD _PROTOTYPE( struct proc *vmrestart_check, (message *)); /*===========================================================================* @@ -77,26 +77,18 @@ PUBLIC void sys_task() /* Initialize the system task. */ initialize(); + while (TRUE) { struct proc *restarting; restarting = vmrestart_check(&m); - softnotify_check(); - if(softnotify) - minix_panic("softnotify non-NULL before receive (1)", NO_NUM); if(!restarting) { int r; /* Get work. Block and wait until a request message arrives. */ - if(softnotify) - minix_panic("softnotify non-NULL before receive (2)", NO_NUM); if((r=receive(ANY, &m)) != OK) minix_panic("receive() failed", r); - if(m.m_source == SYSTEM) - continue; - if(softnotify) - minix_panic("softnotify non-NULL after receive", NO_NUM); - } + } sys_call_code = (unsigned) m.m_type; call_nr = sys_call_code - KERNEL_CALL; @@ -104,37 +96,13 @@ PUBLIC void sys_task() okendpt(who_e, &who_p); caller_ptr = proc_addr(who_p); - if (caller_ptr->p_endpoint == ipc_stats_target) - sys_stats.total= add64u(sys_stats.total, 1); - /* See if the caller made a valid request and try to handle it. */ if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */ -#if DEBUG_ENABLE_IPC_WARNINGS kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source); -#endif - if (caller_ptr->p_endpoint == ipc_stats_target) - sys_stats.bad_req++; result = EBADREQUEST; /* illegal message type */ } else if (!GET_BIT(priv(caller_ptr)->s_k_call_mask, call_nr)) { -#if DEBUG_ENABLE_IPC_WARNINGS - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { - kprintf("SYSTEM: request %d from %d denied.\n", - call_nr, m.m_source); - } else if (curr == limit+extra) - { - kprintf("sys_task: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; -#endif - if (caller_ptr->p_endpoint == ipc_stats_target) - sys_stats.not_allowed++; result = ECALLDENIED; /* illegal message type */ } else { @@ -146,15 +114,20 @@ PUBLIC void sys_task() * until VM tells us it's allowed. VM has been notified * and we must wait for its reply to restart the call. */ + vmassert(RTS_ISSET(caller_ptr, VMREQUEST)); + vmassert(caller_ptr->p_vmrequest.type == VMSTYPE_KERNELCALL); memcpy(&caller_ptr->p_vmrequest.saved.reqmsg, &m, sizeof(m)); - caller_ptr->p_vmrequest.type = VMSTYPE_SYS_MESSAGE; } else if (result != EDONTREPLY) { /* Send a reply, unless inhibited by a handler function. * Use the kernel function lock_send() to prevent a system * call trap. */ - if(restarting) - RTS_LOCK_UNSET(restarting, VMREQUEST); + if(restarting) { + vmassert(!RTS_ISSET(restarting, VMREQUEST)); +#if 0 + vmassert(!RTS_ISSET(restarting, VMREQTARGET)); +#endif + } m.m_type = result; /* report status of call */ if(WILLRECEIVE(caller_ptr, SYSTEM)) { if (OK != (s=lock_send(m.m_source, &m))) { @@ -222,7 +195,6 @@ PRIVATE void initialize(void) map(SYS_NEWMAP, do_newmap); /* set up a process memory map */ map(SYS_SEGCTL, do_segctl); /* add segment and get selector */ map(SYS_MEMSET, do_memset); /* write char to memory area */ - map(SYS_VM_SETBUF, do_vm_setbuf); /* PM passes buffer for page tables */ map(SYS_VMCTL, do_vmctl); /* various VM process settings */ /* Copying. */ @@ -350,7 +322,11 @@ PUBLIC void send_sig(int proc_nr, int sig_nr) rp = proc_addr(proc_nr); sigaddset(&priv(rp)->s_sig_pending, sig_nr); - soft_notify(rp->p_endpoint); + if(!intr_disabled()) { + lock_notify(SYSTEM, rp->p_endpoint); + } else { + mini_notify(proc_addr(SYSTEM), rp->p_endpoint); + } } /*===========================================================================* @@ -467,7 +443,9 @@ register struct proc *rc; /* slot of process to clean up */ if(isemptyp(rc)) minix_panic("clear_proc: empty process", rc->p_endpoint); - if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR) { + if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR || + rc->p_endpoint == VM_PROC_NR) + { /* This test is great for debugging system processes dying, * but as this happens normally on reboot, not good permanent code. */ @@ -543,13 +521,6 @@ register struct proc *rc; /* slot of process to clean up */ #endif } } - - /* No pending soft notifies. */ - for(np = softnotify; np; np = np->next_soft_notify) { - if(np == rc) { - minix_panic("dying proc was on next_soft_notify", np->p_endpoint); - } - } } /*===========================================================================* @@ -583,28 +554,6 @@ int access; /* does grantee want to CPF_READ or _WRITE? */ return umap_virtual(proc_addr(proc_nr), D, v_offset, bytes); } -/*===========================================================================* - * softnotify_check * - *===========================================================================*/ -PRIVATE void softnotify_check(void) -{ - struct proc *np, *nextnp; - - if(!softnotify) - return; - - for(np = softnotify; np; np = nextnp) { - if(!np->p_softnotified) - minix_panic("softnotify but no p_softnotified", NO_NUM); - lock_notify(SYSTEM, np->p_endpoint); - nextnp = np->next_soft_notify; - np->next_soft_notify = NULL; - np->p_softnotified = 0; - } - - softnotify = NULL; -} - /*===========================================================================* * vmrestart_check * *===========================================================================*/ @@ -618,23 +567,18 @@ PRIVATE struct proc *vmrestart_check(message *m) if(!(restarting = vmrestart)) return NULL; - if(restarting->p_rts_flags & SLOT_FREE) - minix_panic("SYSTEM: VMREQUEST set for empty process", NO_NUM); + vmassert(!RTS_ISSET(restarting, SLOT_FREE)); + vmassert(RTS_ISSET(restarting, VMREQUEST)); type = restarting->p_vmrequest.type; restarting->p_vmrequest.type = VMSTYPE_SYS_NONE; vmrestart = restarting->p_vmrequest.nextrestart; - if(!RTS_ISSET(restarting, VMREQUEST)) - minix_panic("SYSTEM: VMREQUEST not set for process on vmrestart queue", - restarting->p_endpoint); - switch(type) { - case VMSTYPE_SYS_MESSAGE: + case VMSTYPE_KERNELCALL: memcpy(m, &restarting->p_vmrequest.saved.reqmsg, sizeof(*m)); - if(m->m_source != restarting->p_endpoint) - minix_panic("SYSTEM: vmrestart source doesn't match", - NO_NUM); + restarting->p_vmrequest.saved.reqmsg.m_source = NONE; + vmassert(m->m_source == restarting->p_endpoint); /* Original caller could've disappeared in the meantime. */ if(!isokendpt(m->m_source, &who_p)) { kprintf("SYSTEM: ignoring call %d from dead %d\n", @@ -653,26 +597,6 @@ PRIVATE struct proc *vmrestart_check(message *m) } } return restarting; - case VMSTYPE_SYS_CALL: - kprintf("SYSTEM: restart sys_call\n"); - /* Restarting a kernel trap. */ - sys_call_restart(restarting); - - /* Handled; restart system loop. */ - return NULL; - case VMSTYPE_MSGCOPY: - /* Do delayed message copy. */ - if((r=data_copy(SYSTEM, - (vir_bytes) &restarting->p_vmrequest.saved.msgcopy.msgbuf, - restarting->p_vmrequest.saved.msgcopy.dst->p_endpoint, - (vir_bytes) restarting->p_vmrequest.saved.msgcopy.dst_v, - sizeof(message))) != OK) { - minix_panic("SYSTEM: delayed msgcopy failed", r); - } - RTS_LOCK_UNSET(restarting, VMREQUEST); - - /* Handled; restart system loop. */ - return NULL; default: minix_panic("strange restart type", type); } diff --git a/kernel/system.h b/kernel/system.h index 14f55df3c..d35c7a474 100644 --- a/kernel/system.h +++ b/kernel/system.h @@ -91,9 +91,6 @@ _PROTOTYPE( int do_memset, (message *m_ptr) ); #define do_memset do_unused #endif -_PROTOTYPE( int do_vm_setbuf, (message *m_ptr) ); -_PROTOTYPE( int do_vm_map, (message *m_ptr) ); - _PROTOTYPE( int do_abort, (message *m_ptr) ); #if ! USE_ABORT #define do_abort do_unused diff --git a/kernel/system/Makefile b/kernel/system/Makefile index 496663d1d..a93e64f53 100644 --- a/kernel/system/Makefile +++ b/kernel/system/Makefile @@ -52,7 +52,6 @@ OBJECTS = \ $(SYSTEM)(do_sigreturn.o) \ $(SYSTEM)(do_abort.o) \ $(SYSTEM)(do_getinfo.o) \ - $(SYSTEM)(do_vm_setbuf.o) \ $(SYSTEM)(do_sprofile.o) \ $(SYSTEM)(do_cprofile.o) \ $(SYSTEM)(do_profbuf.o) \ @@ -166,9 +165,6 @@ $(SYSTEM)(do_vm.o): do_vm.o do_vm.o: do_vm.c $(CC) do_vm.c -$(SYSTEM)(do_vm_setbuf.o): do_vm_setbuf.c - $(CC) do_vm_setbuf.c - $(SYSTEM)(do_sprofile.o): do_sprofile.c $(CC) do_sprofile.c diff --git a/kernel/system/do_devio.c b/kernel/system/do_devio.c index ee7e0a912..65834be38 100644 --- a/kernel/system/do_devio.c +++ b/kernel/system/do_devio.c @@ -63,19 +63,8 @@ register message *m_ptr; /* pointer to request message */ } if (i >= nr_io_range) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { kprintf("do_devio: port 0x%x (size %d) not allowed\n", m_ptr->DIO_PORT, size); - } else if (curr == limit+extra) - { - kprintf("do_devio: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return EPERM; } } @@ -83,19 +72,8 @@ register message *m_ptr; /* pointer to request message */ doit: if (m_ptr->DIO_PORT & (size-1)) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { kprintf("do_devio: unaligned port 0x%x (size %d)\n", m_ptr->DIO_PORT, size); - } else if (curr == limit+extra) - { - kprintf("do_devio: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return EPERM; } diff --git a/kernel/system/do_exec.c b/kernel/system/do_exec.c index c37eeb7c7..a608c21cf 100644 --- a/kernel/system/do_exec.c +++ b/kernel/system/do_exec.c @@ -31,6 +31,11 @@ register message *m_ptr; /* pointer to request message */ rp = proc_addr(proc_nr); + if(rp->p_misc_flags & MF_DELIVERMSG) { + rp->p_misc_flags &= ~MF_DELIVERMSG; + rp->p_delivermsg_lin = 0; + } + /* Save command name for debugging, ps(1) output, etc. */ if(data_copy(who_e, (vir_bytes) m_ptr->PR_NAME_PTR, SYSTEM, (vir_bytes) rp->p_name, (phys_bytes) P_NAME_LEN - 1) != OK) diff --git a/kernel/system/do_fork.c b/kernel/system/do_fork.c index 2e5ee9135..aa94a3418 100644 --- a/kernel/system/do_fork.c +++ b/kernel/system/do_fork.c @@ -9,6 +9,7 @@ */ #include "../system.h" +#include "../vm.h" #include #include @@ -33,10 +34,25 @@ register message *m_ptr; /* pointer to request message */ if(!isokendpt(m_ptr->PR_ENDPT, &p_proc)) return EINVAL; + rpp = proc_addr(p_proc); rpc = proc_addr(m_ptr->PR_SLOT); if (isemptyp(rpp) || ! isemptyp(rpc)) return(EINVAL); + vmassert(!(rpp->p_misc_flags & MF_DELIVERMSG)); + + /* needs to be receiving so we know where the message buffer is */ + if(!RTS_ISSET(rpp, RECEIVING)) { + printf("kernel: fork not done synchronously?\n"); + return EINVAL; + } + + /* memory becomes readonly */ + if (priv(rpp)->s_asynsize > 0) { + printf("kernel: process with waiting asynsend table can't fork\n"); + return EINVAL; + } + map_ptr= (struct mem_map *) m_ptr->PR_MEM_PTR; /* Copy parent 'proc' struct to child. And reinitialize some fields. */ @@ -59,7 +75,7 @@ register message *m_ptr; /* pointer to request message */ rpc->p_reg.psw &= ~TRACEBIT; /* clear trace bit */ - rpc->p_misc_flags &= ~(VIRT_TIMER | PROF_TIMER); + rpc->p_misc_flags &= ~(MF_VIRT_TIMER | MF_PROF_TIMER); rpc->p_virt_left = 0; /* disable, clear the process-virtual timers */ rpc->p_prof_left = 0; @@ -81,9 +97,11 @@ register message *m_ptr; /* pointer to request message */ /* Calculate endpoint identifier, so caller knows what it is. */ m_ptr->PR_ENDPT = rpc->p_endpoint; + m_ptr->PR_FORK_MSGADDR = (char *) rpp->p_delivermsg_vir; /* Install new map */ r = newmap(rpc, map_ptr); + FIXLINMSG(rpc); /* Don't schedule process in VM mode until it has a new pagetable. */ if(m_ptr->PR_FORK_FLAGS & PFF_VMINHIBIT) { diff --git a/kernel/system/do_getinfo.c b/kernel/system/do_getinfo.c index e40889e1c..ecafdc2cb 100644 --- a/kernel/system/do_getinfo.c +++ b/kernel/system/do_getinfo.c @@ -28,9 +28,8 @@ register message *m_ptr; /* pointer to request message */ */ size_t length; vir_bytes src_vir; - int proc_nr, nr_e, nr; + int proc_nr, nr_e, nr, r; struct proc *caller; - phys_bytes ph; int wipe_rnd_bin = -1; caller = proc_addr(who_p); @@ -67,19 +66,6 @@ register message *m_ptr; /* pointer to request message */ src_vir = (vir_bytes) irq_hooks; break; } - case GET_SCHEDINFO: { - /* This is slightly complicated because we need two data structures - * at once, otherwise the scheduling information may be incorrect. - * Copy the queue heads and fall through to copy the process table. - */ - if((ph=umap_local(caller, D, (vir_bytes) m_ptr->I_VAL_PTR2,length)) == 0) - return EFAULT; - length = sizeof(struct proc *) * NR_SCHED_QUEUES; - CHECKRANGE_OR_SUSPEND(proc_addr(who_p), ph, length, 1); - data_copy(SYSTEM, (vir_bytes) rdy_head, - who_e, (vir_bytes) m_ptr->I_VAL_PTR2, length); - /* fall through to GET_PROCTAB */ - } case GET_PROCTAB: { length = sizeof(struct proc) * (NR_PROCS + NR_TASKS); src_vir = (vir_bytes) proc; @@ -174,15 +160,16 @@ register message *m_ptr; /* pointer to request message */ /* Try to make the actual copy for the requested data. */ if (m_ptr->I_VAL_LEN > 0 && length > m_ptr->I_VAL_LEN) return (E2BIG); - if((ph=umap_local(caller, D, (vir_bytes) m_ptr->I_VAL_PTR,length)) == 0) - return EFAULT; - CHECKRANGE_OR_SUSPEND(caller, ph, length, 1); - if(data_copy(SYSTEM, src_vir, who_e, (vir_bytes) m_ptr->I_VAL_PTR, length) == OK) { + r = data_copy_vmcheck(SYSTEM, src_vir, who_e, + (vir_bytes) m_ptr->I_VAL_PTR, length); + + if(r != OK) return r; + if(wipe_rnd_bin >= 0 && wipe_rnd_bin < RANDOM_SOURCES) { krandom.bin[wipe_rnd_bin].r_size = 0; krandom.bin[wipe_rnd_bin].r_next = 0; } - } + return(OK); } diff --git a/kernel/system/do_irqctl.c b/kernel/system/do_irqctl.c index 041b77b4c..bc3a43324 100644 --- a/kernel/system/do_irqctl.c +++ b/kernel/system/do_irqctl.c @@ -139,10 +139,16 @@ irq_hook_t *hook; */ int proc_nr; + vmassert(intr_disabled()); + /* As a side-effect, the interrupt handler gathers random information by * timestamping the interrupt events. This is used for /dev/random. */ +#if 0 get_randomness(&krandom, hook->irq); +#else + FIXME("get_randomness disabled"); +#endif /* Check if the handler is still alive. * If it's dead, this should never happen, as processes that die @@ -158,7 +164,8 @@ irq_hook_t *hook; priv(proc_addr(proc_nr))->s_int_pending |= (1 << hook->notify_id); /* Build notification message and return. */ - lock_notify(HARDWARE, hook->proc_nr_e); + vmassert(intr_disabled()); + mini_notify(proc_addr(HARDWARE), hook->proc_nr_e); return(hook->policy & IRQ_REENABLE); } diff --git a/kernel/system/do_memset.c b/kernel/system/do_memset.c index 511507042..1359112b6 100644 --- a/kernel/system/do_memset.c +++ b/kernel/system/do_memset.c @@ -8,6 +8,7 @@ */ #include "../system.h" +#include "../vm.h" #if USE_MEMSET @@ -18,10 +19,8 @@ PUBLIC int do_memset(m_ptr) register message *m_ptr; { /* Handle sys_memset(). This writes a pattern into the specified memory. */ - unsigned long p; unsigned char c = m_ptr->MEM_PATTERN; - p = c | (c << 8) | (c << 16) | (c << 24); - phys_memset((phys_bytes) m_ptr->MEM_PTR, p, (phys_bytes) m_ptr->MEM_COUNT); + vm_phys_memset((phys_bytes) m_ptr->MEM_PTR, c, (phys_bytes) m_ptr->MEM_COUNT); return(OK); } diff --git a/kernel/system/do_safecopy.c b/kernel/system/do_safecopy.c index 55744b714..2e7d7f8ee 100644 --- a/kernel/system/do_safecopy.c +++ b/kernel/system/do_safecopy.c @@ -61,22 +61,11 @@ endpoint_t *e_granter; /* new granter (magic grants) */ if(!HASGRANTTABLE(granter_proc)) return EPERM; if(priv(granter_proc)->s_grant_entries <= grant) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { kprintf( "verify_grant: grant verify failed in ep %d proc %d: " "grant %d out of range for table size %d\n", granter, proc_nr, grant, priv(granter_proc)->s_grant_entries); - } else if (curr == limit+extra) - { - kprintf("verify_grant: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return(EPERM); } @@ -219,23 +208,9 @@ int access; /* CPF_READ for a copy from granter to grantee, CPF_WRITE /* Verify permission exists. */ if((r=verify_grant(granter, grantee, grantid, bytes, access, g_offset, &v_offset, &new_granter)) != OK) { - static int curr= 0, limit= 100, extra= 20; - - if (curr < limit+extra) - { -#if 0 kprintf( "grant %d verify to copy %d->%d by %d failed: err %d\n", grantid, *src, *dst, grantee, r); -#endif - } else if (curr == limit+extra) - { - kprintf( - "do_safecopy`safecopy: no debug output for a while\n"); - } - else if (curr == 2*limit-1) - limit *= 2; - curr++; return r; } diff --git a/kernel/system/do_sigsend.c b/kernel/system/do_sigsend.c index ba340dec4..879a05ad0 100644 --- a/kernel/system/do_sigsend.c +++ b/kernel/system/do_sigsend.c @@ -29,18 +29,13 @@ message *m_ptr; /* pointer to request message */ struct sigcontext sc, *scp; struct sigframe fr, *frp; int proc_nr, r; - phys_bytes ph; if (!isokendpt(m_ptr->SIG_ENDPT, &proc_nr)) return(EINVAL); if (iskerneln(proc_nr)) return(EPERM); rp = proc_addr(proc_nr); - ph = umap_local(proc_addr(who_p), D, (vir_bytes) m_ptr->SIG_CTXT_PTR, sizeof(struct sigmsg)); - if(!ph) return EFAULT; - CHECKRANGE_OR_SUSPEND(proc_addr(who_p), ph, sizeof(struct sigmsg), 1); - /* Get the sigmsg structure into our address space. */ - if((r=data_copy(who_e, (vir_bytes) m_ptr->SIG_CTXT_PTR, + if((r=data_copy_vmcheck(who_e, (vir_bytes) m_ptr->SIG_CTXT_PTR, SYSTEM, (vir_bytes) &smsg, (phys_bytes) sizeof(struct sigmsg))) != OK) return r; @@ -54,12 +49,9 @@ message *m_ptr; /* pointer to request message */ sc.sc_flags = 0; /* unused at this time */ sc.sc_mask = smsg.sm_mask; - ph = umap_local(rp, D, (vir_bytes) scp, sizeof(struct sigcontext)); - if(!ph) return EFAULT; - CHECKRANGE_OR_SUSPEND(rp, ph, sizeof(struct sigcontext), 1); /* Copy the sigcontext structure to the user's stack. */ - if((r=data_copy(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT, (vir_bytes) scp, - (vir_bytes) sizeof(struct sigcontext))) != OK) + if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT, + (vir_bytes) scp, (vir_bytes) sizeof(struct sigcontext))) != OK) return r; /* Initialize the sigframe structure. */ @@ -73,11 +65,9 @@ message *m_ptr; /* pointer to request message */ fr.sf_signo = smsg.sm_signo; fr.sf_retadr = (void (*)()) smsg.sm_sigreturn; - ph = umap_local(rp, D, (vir_bytes) frp, sizeof(struct sigframe)); - if(!ph) return EFAULT; - CHECKRANGE_OR_SUSPEND(rp, ph, sizeof(struct sigframe), 1); /* Copy the sigframe structure to the user's stack. */ - if((r=data_copy(SYSTEM, (vir_bytes) &fr, m_ptr->SIG_ENDPT, (vir_bytes) frp, + if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &fr, + m_ptr->SIG_ENDPT, (vir_bytes) frp, (vir_bytes) sizeof(struct sigframe))) != OK) return r; diff --git a/kernel/system/do_sysctl.c b/kernel/system/do_sysctl.c index ff35a6621..69dc4f862 100644 --- a/kernel/system/do_sysctl.c +++ b/kernel/system/do_sysctl.c @@ -16,7 +16,6 @@ PUBLIC int do_sysctl(m_ptr) register message *m_ptr; /* pointer to request message */ { - phys_bytes ph; vir_bytes len, buf; static char mybuf[DIAG_BUFSIZE]; struct proc *caller, *target; @@ -33,10 +32,7 @@ register message *m_ptr; /* pointer to request message */ caller->p_endpoint, len); return EINVAL; } - if((ph=umap_local(caller, D, buf, len)) == 0) - return EFAULT; - CHECKRANGE_OR_SUSPEND(caller, ph, len, 1); - if((s=data_copy(who_e, buf, SYSTEM, (vir_bytes) mybuf, len)) != OK) { + if((s=data_copy_vmcheck(who_e, buf, SYSTEM, (vir_bytes) mybuf, len)) != OK) { kprintf("do_sysctl: diag for %d: len %d: copy failed: %d\n", caller->p_endpoint, len, s); return s; diff --git a/kernel/system/do_umap.c b/kernel/system/do_umap.c index 7c235ba46..39d59fc1f 100644 --- a/kernel/system/do_umap.c +++ b/kernel/system/do_umap.c @@ -48,19 +48,15 @@ register message *m_ptr; /* pointer to request message */ case LOCAL_SEG: phys_addr = lin_addr = umap_local(targetpr, seg_index, offset, count); if(!lin_addr) return EFAULT; - CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1); naughty = 1; break; case REMOTE_SEG: phys_addr = lin_addr = umap_remote(targetpr, seg_index, offset, count); if(!lin_addr) return EFAULT; - CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1); naughty = 1; break; - case GRANT_SEG: - naughty = 1; case LOCAL_VM_SEG: - if(seg_index == MEM_GRANT || seg_type == GRANT_SEG) { + if(seg_index == MEM_GRANT) { vir_bytes newoffset; endpoint_t newep; int new_proc_nr; @@ -93,7 +89,6 @@ register message *m_ptr; /* pointer to request message */ kprintf("SYSTEM:do_umap: umap_local failed\n"); return EFAULT; } - CHECKRANGE_OR_SUSPEND(targetpr, lin_addr, count, 1); if(vm_lookup(targetpr, lin_addr, &phys_addr, NULL) != OK) { kprintf("SYSTEM:do_umap: vm_lookup failed\n"); return EFAULT; diff --git a/kernel/system/do_vm_setbuf.c b/kernel/system/do_vm_setbuf.c deleted file mode 100644 index 484c43559..000000000 --- a/kernel/system/do_vm_setbuf.c +++ /dev/null @@ -1,29 +0,0 @@ -/* The system call implemented in this file: - * m_type: SYS_VM_SETBUF - * - * The parameters for this system call are: - * m4_l1: Start of the buffer - * m4_l2: Length of the buffer - * m4_l3: End of main memory - */ -#include "../system.h" - -#define VM_DEBUG 0 /* enable/ disable debug output */ - -/*===========================================================================* - * do_vm_setbuf * - *===========================================================================*/ -PUBLIC int do_vm_setbuf(m_ptr) -message *m_ptr; /* pointer to request message */ -{ - vm_base= m_ptr->m4_l1; - vm_size= m_ptr->m4_l2; - vm_mem_high= m_ptr->m4_l3; - -#if VM_DEBUG - kprintf("do_vm_setbuf: got 0x%x @ 0x%x for 0x%x\n", - vm_size, vm_base, vm_mem_high); -#endif - - return OK; -} diff --git a/kernel/system/do_vmctl.c b/kernel/system/do_vmctl.c index f522a20f8..03ed97c29 100644 --- a/kernel/system/do_vmctl.c +++ b/kernel/system/do_vmctl.c @@ -21,12 +21,10 @@ register message *m_ptr; /* pointer to request message */ { int proc_nr, i; endpoint_t ep = m_ptr->SVMCTL_WHO; - struct proc *p, *rp; + struct proc *p, *rp, *target; if(ep == SELF) { ep = m_ptr->m_source; } - vm_init(); - if(!isokendpt(ep, &proc_nr)) { kprintf("do_vmctl: unexpected endpoint %d from VM\n", ep); return EINVAL; @@ -42,14 +40,35 @@ register message *m_ptr; /* pointer to request message */ /* Send VM the information about the memory request. */ if(!(rp = vmrequest)) return ESRCH; - if(!RTS_ISSET(rp, VMREQUEST)) - minix_panic("do_vmctl: no VMREQUEST set", NO_NUM); + vmassert(RTS_ISSET(rp, VMREQUEST)); + +#if 0 + printf("kernel: vm request sent by: %s / %d about %d; 0x%lx-0x%lx, wr %d, stack: %s ", + rp->p_name, rp->p_endpoint, rp->p_vmrequest.who, + rp->p_vmrequest.start, + rp->p_vmrequest.start + rp->p_vmrequest.length, + rp->p_vmrequest.writeflag, rp->p_vmrequest.stacktrace); + printf("type %d\n", rp->p_vmrequest.type); +#endif + +#if DEBUG_VMASSERT + okendpt(rp->p_vmrequest.who, &proc_nr); + target = proc_addr(proc_nr); +#if 0 + if(!RTS_ISSET(target, VMREQTARGET)) { + printf("set stack: %s\n", rp->p_vmrequest.stacktrace); + minix_panic("VMREQTARGET not set for target", + NO_NUM); + } +#endif +#endif /* Reply with request fields. */ m_ptr->SVMCTL_MRG_ADDR = (char *) rp->p_vmrequest.start; m_ptr->SVMCTL_MRG_LEN = rp->p_vmrequest.length; m_ptr->SVMCTL_MRG_WRITE = rp->p_vmrequest.writeflag; m_ptr->SVMCTL_MRG_EP = rp->p_vmrequest.who; + m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; rp->p_vmrequest.vmresult = VMSUSPEND; /* Remove from request chain. */ @@ -57,46 +76,61 @@ register message *m_ptr; /* pointer to request message */ return OK; case VMCTL_MEMREQ_REPLY: - if(!(rp = p->p_vmrequest.requestor)) - minix_panic("do_vmctl: no requestor set", ep); - p->p_vmrequest.requestor = NULL; - if(!RTS_ISSET(rp, VMREQUEST)) - minix_panic("do_vmctl: no VMREQUEST set", ep); - if(rp->p_vmrequest.vmresult != VMSUSPEND) - minix_panic("do_vmctl: result not VMSUSPEND set", - rp->p_vmrequest.vmresult); - rp->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; - if(rp->p_vmrequest.vmresult == VMSUSPEND) - minix_panic("VM returned VMSUSPEND?", NO_NUM); - if(rp->p_vmrequest.vmresult != OK) + vmassert(RTS_ISSET(p, VMREQUEST)); + vmassert(p->p_vmrequest.vmresult == VMSUSPEND); + okendpt(p->p_vmrequest.who, &proc_nr); + target = proc_addr(proc_nr); + p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; + vmassert(p->p_vmrequest.vmresult != VMSUSPEND); + if(p->p_vmrequest.vmresult != OK) kprintf("SYSTEM: VM replied %d to mem request\n", - rp->p_vmrequest.vmresult); + p->p_vmrequest.vmresult); - /* Put on restart chain. */ - rp->p_vmrequest.nextrestart = vmrestart; - vmrestart = rp; +#if 0 + printf("memreq reply: vm request sent by: %s / %d about %d; 0x%lx-0x%lx, wr %d, stack: %s ", + p->p_name, p->p_endpoint, p->p_vmrequest.who, + p->p_vmrequest.start, + p->p_vmrequest.start + p->p_vmrequest.length, + p->p_vmrequest.writeflag, p->p_vmrequest.stacktrace); + printf("type %d\n", p->p_vmrequest.type); + + vmassert(RTS_ISSET(target, VMREQTARGET)); + RTS_LOCK_UNSET(target, VMREQTARGET); +#endif + + if(p->p_vmrequest.type == VMSTYPE_KERNELCALL) { + /* Put on restart chain. */ + p->p_vmrequest.nextrestart = vmrestart; + vmrestart = p; + } else if(p->p_vmrequest.type == VMSTYPE_DELIVERMSG) { + vmassert(p->p_misc_flags & MF_DELIVERMSG); + vmassert(p == target); + vmassert(RTS_ISSET(p, VMREQUEST)); + RTS_LOCK_UNSET(p, VMREQUEST); + } else { #if DEBUG_VMASSERT - /* Sanity check. */ - if(rp->p_vmrequest.vmresult == OK) { - if(CHECKRANGE(p, - rp->p_vmrequest.start, - rp->p_vmrequest.length, - rp->p_vmrequest.writeflag) != OK) { -kprintf("SYSTEM: request %d:0x%lx-0x%lx, wrflag %d, failed\n", - rp->p_endpoint, - rp->p_vmrequest.start, rp->p_vmrequest.start + rp->p_vmrequest.length, - rp->p_vmrequest.writeflag); - - minix_panic("SYSTEM: fail but VM said OK", NO_NUM); - } + printf("suspended with stack: %s\n", + p->p_vmrequest.stacktrace); +#endif + minix_panic("strange request type", + p->p_vmrequest.type); } -#endif + return OK; -#if VM_KERN_NOPAGEZERO - case VMCTL_NOPAGEZERO: + case VMCTL_ENABLE_PAGING: + if(vm_running) + minix_panic("do_vmctl: paging already enabled", NO_NUM); + vm_init(p); + if(!vm_running) + minix_panic("do_vmctl: paging enabling failed", NO_NUM); + vmassert(p->p_delivermsg_lin == + umap_local(p, D, p->p_delivermsg_vir, sizeof(message))); + if(newmap(p, (struct mem_map *) m_ptr->SVMCTL_VALUE) != OK) + minix_panic("do_vmctl: newmap failed", NO_NUM); + FIXLINMSG(p); + vmassert(p->p_delivermsg_lin); return OK; -#endif } /* Try architecture-specific vmctls. */ diff --git a/kernel/system/do_vtimer.c b/kernel/system/do_vtimer.c index 8904ab60d..50a957ab7 100644 --- a/kernel/system/do_vtimer.c +++ b/kernel/system/do_vtimer.c @@ -46,10 +46,10 @@ message *m_ptr; /* pointer to request message */ * VT_VIRTUAL and VT_PROF multiple times below. */ if (m_ptr->VT_WHICH == VT_VIRTUAL) { - pt_flag = VIRT_TIMER; + pt_flag = MF_VIRT_TIMER; pt_left = &rp->p_virt_left; } else { /* VT_PROF */ - pt_flag = PROF_TIMER; + pt_flag = MF_PROF_TIMER; pt_left = &rp->p_prof_left; } @@ -101,15 +101,15 @@ struct proc *rp; /* pointer to the process */ */ /* Check if the virtual timer expired. If so, send a SIGVTALRM signal. */ - if ((rp->p_misc_flags & VIRT_TIMER) && rp->p_virt_left <= 0) { - rp->p_misc_flags &= ~VIRT_TIMER; + if ((rp->p_misc_flags & MF_VIRT_TIMER) && rp->p_virt_left <= 0) { + rp->p_misc_flags &= ~MF_VIRT_TIMER; rp->p_virt_left = 0; cause_sig(rp->p_nr, SIGVTALRM); } /* Check if the profile timer expired. If so, send a SIGPROF signal. */ - if ((rp->p_misc_flags & PROF_TIMER) && rp->p_prof_left <= 0) { - rp->p_misc_flags &= ~PROF_TIMER; + if ((rp->p_misc_flags & MF_PROF_TIMER) && rp->p_prof_left <= 0) { + rp->p_misc_flags &= ~MF_PROF_TIMER; rp->p_prof_left = 0; cause_sig(rp->p_nr, SIGPROF); } diff --git a/kernel/table.c b/kernel/table.c index 01a0c0edb..960801981 100755 --- a/kernel/table.c +++ b/kernel/table.c @@ -35,7 +35,7 @@ /* Define stack sizes for the kernel tasks included in the system image. */ #define NO_STACK 0 -#define SMALL_STACK (256 * sizeof(char *)) +#define SMALL_STACK (1024 * sizeof(char *)) #define IDL_S SMALL_STACK /* 3 intr, 3 temps, 4 db for Intel */ #define HRD_S NO_STACK /* dummy task, uses kernel stack */ #define TSK_S SMALL_STACK /* system and clock task */ @@ -48,6 +48,7 @@ PUBLIC char *t_stack[TOT_STACK_SPACE / sizeof(char *)]; #define IDL_F (SYS_PROC | PREEMPTIBLE | BILLABLE) /* idle task */ #define TSK_F (SYS_PROC) /* kernel tasks */ #define SRV_F (SYS_PROC | PREEMPTIBLE) /* system services */ +#define VM_F (SYS_PROC) /* vm */ #define USR_F (BILLABLE | PREEMPTIBLE | PROC_FULLVM) /* user processes */ #define SVM_F (SRV_F | PROC_FULLVM) /* servers with VM */ @@ -91,6 +92,7 @@ PRIVATE int ds_c[] = { SYS_ALL_CALLS }, vm_c[] = { SYS_ALL_CALLS }, drv_c[] = { DRV_C }, + usr_c[] = { SYS_SYSCTL }, tty_c[] = { DRV_C, SYS_PHYSCOPY, SYS_ABORT, SYS_IOPENABLE, SYS_READBIOS }, mem_c[] = { DRV_C, SYS_PHYSCOPY, SYS_PHYSVCOPY, SYS_IOPENABLE }; @@ -115,16 +117,16 @@ PUBLIC struct boot_image image[] = { {CLOCK,clock_task,TSK_F, 8, TASK_Q, TSK_S, TSK_T, 0, no_c,"clock" }, {SYSTEM, sys_task,TSK_F, 8, TASK_Q, TSK_S, TSK_T, 0, no_c,"system"}, {HARDWARE, 0,TSK_F, 8, TASK_Q, HRD_S, 0, 0, no_c,"kernel"}, -{PM_PROC_NR, 0,SVM_F, 32, 4, 0, SRV_T, SRV_M, c(pm_c),"pm" }, -{FS_PROC_NR, 0,SVM_F, 32, 5, 0, SRV_T, SRV_M, c(fs_c),"vfs" }, +{PM_PROC_NR, 0,SRV_F, 32, 4, 0, SRV_T, SRV_M, c(pm_c),"pm" }, +{FS_PROC_NR, 0,SRV_F, 32, 5, 0, SRV_T, SRV_M, c(fs_c),"vfs" }, {RS_PROC_NR, 0,SVM_F, 4, 4, 0, SRV_T, SYS_M, c(rs_c),"rs" }, {MEM_PROC_NR, 0,SVM_F, 4, 3, 0, SRV_T, SYS_M,c(mem_c),"memory"}, -{LOG_PROC_NR, 0,SVM_F, 4, 2, 0, SRV_T, SYS_M,c(drv_c),"log" }, +{LOG_PROC_NR, 0,SRV_F, 4, 2, 0, SRV_T, SYS_M,c(drv_c),"log" }, {TTY_PROC_NR, 0,SVM_F, 4, 1, 0, SRV_T, SYS_M,c(tty_c),"tty" }, {DS_PROC_NR, 0,SVM_F, 4, 4, 0, SRV_T, SYS_M, c(ds_c),"ds" }, {MFS_PROC_NR, 0,SVM_F, 32, 5, 0, SRV_T, SRV_M, c(fs_c),"mfs" }, -{VM_PROC_NR, 0,SRV_F, 32, 2, 0, SRV_T, SRV_M, c(vm_c),"vm" }, -{INIT_PROC_NR, 0,USR_F, 8, USER_Q, 0, USR_T, USR_M, no_c,"init" }, +{VM_PROC_NR, 0,VM_F, 32, 2, 0, SRV_T, SRV_M, c(vm_c),"vm" }, +{INIT_PROC_NR, 0,USR_F, 8, USER_Q, 0, USR_T, USR_M, c(usr_c),"init" }, }; /* Verify the size of the system image table at compile time. Also verify that @@ -137,5 +139,3 @@ PUBLIC struct boot_image image[] = { extern int dummy[(NR_BOOT_PROCS==sizeof(image)/ sizeof(struct boot_image))?1:-1]; extern int dummy[(BITCHUNK_BITS > NR_BOOT_PROCS - 1) ? 1 : -1]; - -PUBLIC endpoint_t ipc_stats_target= NONE; diff --git a/kernel/vm.h b/kernel/vm.h index 9e0b615da..6c3473f2b 100644 --- a/kernel/vm.h +++ b/kernel/vm.h @@ -2,18 +2,19 @@ #ifndef _VM_H #define _VM_H 1 -#define CHECKRANGE_OR_SUSPEND(pr, start, length, wr) { int mr; \ - if(vm_running && (mr=vm_checkrange(proc_addr(who_p), pr, start, length, wr, 0)) != OK) { \ - return mr; \ - } } - -#define CHECKRANGE(pr, start, length, wr) \ - vm_checkrange(proc_addr(who_p), pr, start, length, wr, 1) - -/* Pseudo error code indicating a process request has to be - * restarted after an OK from VM. - */ +/* Pseudo error codes */ #define VMSUSPEND -996 +#define EFAULT_SRC -995 +#define EFAULT_DST -994 + +#define FIXLINMSG(prp) { prp->p_delivermsg_lin = umap_local(prp, D, prp->p_delivermsg_vir, sizeof(message)); } + +#define PHYS_COPY_CATCH(src, dst, size, a) { \ + vmassert(intr_disabled()); \ + catch_pagefaults++; \ + a = phys_copy(src, dst, size); \ + catch_pagefaults--; \ + } #endif