Changed pagefault delivery to VM
this patch changes the way pagefaults are delivered to VM. It adopts the same model as the out-of-quantum messages sent by kernel to a scheduler. - everytime a userspace pagefault occurs, kernel creates a message which is sent to VM on behalf of the faulting process - the process is blocked on delivery to VM in the standard IPC code instead of waiting in a spacial in-kernel queue (stack) and is not runnable until VM tell kernel that the pagefault is resolved and is free to clear the RTS_PAGEFAULT flag. - VM does not need call kernel and poll the pagefault information which saves many (1/2?) calls and kernel calls that return "no more data" - VM notification by kernel does not need to use signals - each entry in proc table is by 12 bytes smaller (~3k save)
This commit is contained in:
parent
a131085a5b
commit
f51eea4b32
13 changed files with 104 additions and 178 deletions
|
@ -28,16 +28,6 @@ typedef struct segframe {
|
|||
struct segdesc_s p_ldt[LDT_SIZE]; /* CS, DS and remote */
|
||||
} segframe_t;
|
||||
|
||||
/* Page fault event. Stored in process table. Only valid if PAGEFAULT
|
||||
* set in p_rts_flags.
|
||||
*/
|
||||
struct pagefault
|
||||
{
|
||||
u32_t pf_virtual; /* Address causing fault (CR2). */
|
||||
u32_t pf_flags; /* Pagefault flags on stack. */
|
||||
};
|
||||
|
||||
|
||||
/* fpu_state_s is used in kernel proc table.
|
||||
* Any changes in this structure requires changes in sconst.h,
|
||||
* since this structure is used in proc structure. */
|
||||
|
|
|
@ -614,7 +614,6 @@
|
|||
|
||||
/* Values for SVMCTL_PARAM. */
|
||||
#define VMCTL_I386_SETCR3 10
|
||||
#define VMCTL_GET_PAGEFAULT 11
|
||||
#define VMCTL_CLEAR_PAGEFAULT 12
|
||||
#define VMCTL_I386_GETCR3 13
|
||||
#define VMCTL_MEMREQ_GET 14
|
||||
|
@ -1018,6 +1017,11 @@
|
|||
#define NR_VM_CALLS 42
|
||||
#define VM_CALL_MASK_SIZE BITMAP_CHUNKS(NR_VM_CALLS)
|
||||
|
||||
/* not handled as a normal VM call, thus at the end of the reserved rage */
|
||||
#define VM_PAGEFAULT (VM_RQ_BASE+0xff)
|
||||
# define VPF_ADDR m1_i1
|
||||
# define VPF_FLAGS m1_i2
|
||||
|
||||
/* Basic vm calls allowed to every process. */
|
||||
#define VM_BASIC_CALLS \
|
||||
VM_MMAP, VM_MUNMAP, VM_MUNMAP_TEXT, VM_MAP_PHYS, VM_UNMAP_PHYS
|
||||
|
|
|
@ -75,13 +75,12 @@ typedef unsigned long sigset_t;
|
|||
#define IS_SIGS(signo) (signo>=SIGS_FIRST && signo<=SIGS_LAST)
|
||||
|
||||
/* Signals delivered by the kernel. */
|
||||
#define SIGKPF 27 /* kernel page fault request pending */
|
||||
#define SIGKMEM 28 /* kernel memory request pending */
|
||||
#define SIGKMESS 29 /* new kernel message */
|
||||
#define SIGKSIGSM 30 /* kernel signal pending for signal manager */
|
||||
#define SIGKSIG 31 /* kernel signal pending */
|
||||
#define SIGKMEM 27 /* kernel memory request pending */
|
||||
#define SIGKMESS 28 /* new kernel message */
|
||||
#define SIGKSIGSM 29 /* kernel signal pending for signal manager */
|
||||
#define SIGKSIG 30 /* kernel signal pending */
|
||||
|
||||
#define SIGK_FIRST SIGKPF /* first kernel signal */
|
||||
#define SIGK_FIRST SIGKMEM /* first kernel signal */
|
||||
#define SIGK_LAST SIGKSIG /* last kernel signal */
|
||||
#define IS_SIGK(signo) (signo>=SIGK_FIRST && signo<=SIGK_LAST)
|
||||
|
||||
|
|
|
@ -41,19 +41,6 @@ struct proc *p;
|
|||
/* Increase process SP. */
|
||||
p->p_reg.sp += m_ptr->SVMCTL_VALUE;
|
||||
return OK;
|
||||
case VMCTL_GET_PAGEFAULT:
|
||||
{
|
||||
struct proc *rp;
|
||||
if(!(rp=pagefaults))
|
||||
return ESRCH;
|
||||
pagefaults = rp->p_nextpagefault;
|
||||
if(!RTS_ISSET(rp, RTS_PAGEFAULT))
|
||||
panic( "non-PAGEFAULT process on pagefault chain: %d", rp->p_endpoint);
|
||||
m_ptr->SVMCTL_PF_WHO = rp->p_endpoint;
|
||||
m_ptr->SVMCTL_PF_I386_CR2 = rp->p_pagefault.pf_virtual;
|
||||
m_ptr->SVMCTL_PF_I386_ERR = rp->p_pagefault.pf_flags;
|
||||
return OK;
|
||||
}
|
||||
case VMCTL_I386_KERNELLIMIT:
|
||||
{
|
||||
int r;
|
||||
|
|
|
@ -20,6 +20,8 @@ void pagefault( struct proc *pr,
|
|||
int in_physcopy = 0;
|
||||
|
||||
reg_t pagefaultcr2;
|
||||
message m_pagefault;
|
||||
int err;
|
||||
|
||||
assert(frame);
|
||||
|
||||
|
@ -77,17 +79,16 @@ void pagefault( struct proc *pr,
|
|||
assert(!RTS_ISSET(pr, RTS_PAGEFAULT));
|
||||
RTS_SET(pr, RTS_PAGEFAULT);
|
||||
|
||||
/* Save pagefault details, suspend process,
|
||||
* add process to pagefault chain,
|
||||
* and tell VM there is a pagefault to be
|
||||
* handled.
|
||||
*/
|
||||
pr->p_pagefault.pf_virtual = pagefaultcr2;
|
||||
pr->p_pagefault.pf_flags = frame->errcode;
|
||||
pr->p_nextpagefault = pagefaults;
|
||||
pagefaults = pr;
|
||||
|
||||
send_sig(VM_PROC_NR, SIGKPF);
|
||||
/* tell Vm about the pagefault */
|
||||
m_pagefault.m_source = pr->p_endpoint;
|
||||
m_pagefault.m_type = VM_PAGEFAULT;
|
||||
m_pagefault.VPF_ADDR = pagefaultcr2;
|
||||
m_pagefault.VPF_FLAGS = frame->errcode;
|
||||
|
||||
if ((err = mini_send(pr, VM_PROC_NR,
|
||||
&m_pagefault, FROM_KERNEL))) {
|
||||
panic("WARNING: pagefault: mini_send returned %d\n", err);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -28,7 +28,6 @@ EXTERN struct loadinfo kloadinfo; /* status of load average */
|
|||
EXTERN struct proc *proc_ptr; /* pointer to currently running process */
|
||||
EXTERN struct proc *bill_ptr; /* process to bill for clock ticks */
|
||||
EXTERN struct proc *vmrequest; /* first process on vmrequest queue */
|
||||
EXTERN struct proc *pagefaults; /* first process on pagefault queue */
|
||||
EXTERN unsigned lost_ticks; /* clock ticks counted outside clock task */
|
||||
|
||||
|
||||
|
|
|
@ -31,8 +31,6 @@ struct proc {
|
|||
struct proc *p_scheduler; /* who should get out of quantum msg */
|
||||
|
||||
struct mem_map p_memmap[NR_LOCAL_SEGS]; /* memory map (T, D, S) */
|
||||
struct pagefault p_pagefault; /* valid if PAGEFAULT in p_rts_flags set */
|
||||
struct proc *p_nextpagefault; /* next on PAGEFAULT chain */
|
||||
|
||||
clock_t p_user_time; /* user time in ticks */
|
||||
clock_t p_sys_time; /* sys time in ticks */
|
||||
|
|
|
@ -12,22 +12,6 @@ PUBLIC int sys_vmctl(endpoint_t who, int param, u32_t value)
|
|||
return(r);
|
||||
}
|
||||
|
||||
PUBLIC int sys_vmctl_get_pagefault_i386(endpoint_t *who, u32_t *cr2, u32_t *err)
|
||||
{
|
||||
message m;
|
||||
int r;
|
||||
|
||||
m.SVMCTL_WHO = SELF;
|
||||
m.SVMCTL_PARAM = VMCTL_GET_PAGEFAULT;
|
||||
r = _kernel_call(SYS_VMCTL, &m);
|
||||
if(r == OK) {
|
||||
*who = m.SVMCTL_PF_WHO;
|
||||
*cr2 = m.SVMCTL_PF_I386_CR2;
|
||||
*err = m.SVMCTL_PF_I386_ERR;
|
||||
}
|
||||
return(r);
|
||||
}
|
||||
|
||||
PUBLIC int sys_vmctl_get_cr3_i386(endpoint_t who, u32_t *cr3)
|
||||
{
|
||||
message m;
|
||||
|
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
#Arch-specific sources
|
||||
.PATH: ${.CURDIR}/arch/${ARCH}
|
||||
SRCS+= vm.c pagetable.c arch_pagefaults.c #util.S
|
||||
SRCS+= vm.c pagetable.c #util.S
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
|
||||
#define _SYSTEM 1
|
||||
|
||||
#include <minix/callnr.h>
|
||||
#include <minix/com.h>
|
||||
#include <minix/config.h>
|
||||
#include <minix/const.h>
|
||||
#include <minix/ds.h>
|
||||
#include <minix/endpoint.h>
|
||||
#include <minix/keymap.h>
|
||||
#include <minix/minlib.h>
|
||||
#include <minix/type.h>
|
||||
#include <minix/ipc.h>
|
||||
#include <minix/sysutil.h>
|
||||
#include <minix/syslib.h>
|
||||
#include <minix/safecopies.h>
|
||||
#include <minix/bitmap.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <env.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "glo.h"
|
||||
#include "proto.h"
|
||||
#include "util.h"
|
||||
|
||||
/*===========================================================================*
|
||||
* arch_handle_pagefaults *
|
||||
*===========================================================================*/
|
||||
PUBLIC int arch_get_pagefault(who, addr, err)
|
||||
endpoint_t *who;
|
||||
vir_bytes *addr;
|
||||
u32_t *err;
|
||||
{
|
||||
return sys_vmctl_get_pagefault_i386(who, addr, err);
|
||||
}
|
||||
|
|
@ -74,7 +74,7 @@ FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
|
|||
PUBLIC int main(void)
|
||||
{
|
||||
message msg;
|
||||
int result, who_e;
|
||||
int result, who_e, rcv_sts;
|
||||
sigset_t sigset;
|
||||
|
||||
/* SEF local startup. */
|
||||
|
@ -92,12 +92,12 @@ PUBLIC int main(void)
|
|||
}
|
||||
SANITYCHECK(SCL_DETAIL);
|
||||
|
||||
if ((r=sef_receive(ANY, &msg)) != OK)
|
||||
panic("sef_receive() error: %d", r);
|
||||
if ((r=sef_receive_status(ANY, &msg, &rcv_sts)) != OK)
|
||||
panic("sef_receive_status() error: %d", r);
|
||||
|
||||
SANITYCHECK(SCL_DETAIL);
|
||||
|
||||
if(msg.m_type & NOTIFY_MESSAGE) {
|
||||
if (is_ipc_notify(rcv_sts)) {
|
||||
/* Unexpected notify(). */
|
||||
printf("VM: ignoring notify() from %d\n", msg.m_source);
|
||||
continue;
|
||||
|
@ -105,16 +105,30 @@ PUBLIC int main(void)
|
|||
who_e = msg.m_source;
|
||||
c = CALLNUMBER(msg.m_type);
|
||||
result = ENOSYS; /* Out of range or restricted calls return this. */
|
||||
if(c < 0 || !vm_calls[c].vmc_func) {
|
||||
if (msg.m_type == VM_PAGEFAULT) {
|
||||
if (!IPC_STATUS_FLAGS_TEST(rcv_sts, IPC_FLG_MSG_FROM_KERNEL)) {
|
||||
printf("VM: process %d faked VM_PAGEFAULT "
|
||||
"message!\n", msg.m_source);
|
||||
}
|
||||
do_pagefaults(&msg);
|
||||
/*
|
||||
* do not reply to this call, the caller is unblocked by
|
||||
* a sys_vmctl() call in do_pagefaults if success. VM panics
|
||||
* otherwise
|
||||
*/
|
||||
continue;
|
||||
} else if(c < 0 || !vm_calls[c].vmc_func) {
|
||||
printf("VM: out of range or missing callnr %d from %d\n",
|
||||
msg.m_type, who_e);
|
||||
} else if (vm_acl_ok(who_e, c) != OK) {
|
||||
printf("VM: unauthorized %s by %d\n",
|
||||
vm_calls[c].vmc_name, who_e);
|
||||
} else {
|
||||
SANITYCHECK(SCL_FUNCTIONS);
|
||||
result = vm_calls[c].vmc_func(&msg);
|
||||
SANITYCHECK(SCL_FUNCTIONS);
|
||||
if (vm_acl_ok(who_e, c) != OK) {
|
||||
printf("VM: unauthorized %s by %d\n",
|
||||
vm_calls[c].vmc_name, who_e);
|
||||
} else {
|
||||
SANITYCHECK(SCL_FUNCTIONS);
|
||||
result = vm_calls[c].vmc_func(&msg);
|
||||
SANITYCHECK(SCL_FUNCTIONS);
|
||||
}
|
||||
}
|
||||
|
||||
/* Send reply message, unless the return code is SUSPEND,
|
||||
|
@ -381,10 +395,6 @@ PRIVATE void sef_cb_signal_handler(int signo)
|
|||
case SIGKMEM:
|
||||
do_memory();
|
||||
break;
|
||||
/* There is a pending page fault request from the kernel. */
|
||||
case SIGKPF:
|
||||
do_pagefaults();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -51,77 +51,73 @@ char *pf_errstr(u32_t err)
|
|||
/*===========================================================================*
|
||||
* do_pagefaults *
|
||||
*===========================================================================*/
|
||||
PUBLIC void do_pagefaults(void)
|
||||
PUBLIC void do_pagefaults(message *m)
|
||||
{
|
||||
endpoint_t ep;
|
||||
u32_t addr, err;
|
||||
endpoint_t ep = m->m_source;
|
||||
u32_t addr = m->VPF_ADDR;
|
||||
u32_t err = m->VPF_FLAGS;
|
||||
struct vmproc *vmp;
|
||||
int r, s;
|
||||
|
||||
while((r=arch_get_pagefault(&ep, &addr, &err)) == OK) {
|
||||
struct vir_region *region;
|
||||
vir_bytes offset;
|
||||
int p, wr = PFERR_WRITE(err);
|
||||
struct vir_region *region;
|
||||
vir_bytes offset;
|
||||
int p, wr = PFERR_WRITE(err);
|
||||
|
||||
if(vm_isokendpt(ep, &p) != OK)
|
||||
panic("do_pagefaults: endpoint wrong: %d", ep);
|
||||
if(vm_isokendpt(ep, &p) != OK)
|
||||
panic("do_pagefaults: endpoint wrong: %d", ep);
|
||||
|
||||
vmp = &vmproc[p];
|
||||
assert(vmp->vm_flags & VMF_INUSE);
|
||||
vmp = &vmproc[p];
|
||||
assert(vmp->vm_flags & VMF_INUSE);
|
||||
|
||||
/* See if address is valid at all. */
|
||||
if(!(region = map_lookup(vmp, addr))) {
|
||||
assert(PFERR_NOPAGE(err));
|
||||
printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx %s\n",
|
||||
/* See if address is valid at all. */
|
||||
if(!(region = map_lookup(vmp, addr))) {
|
||||
assert(PFERR_NOPAGE(err));
|
||||
printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx %s\n",
|
||||
ep, arch_map2vir(vmp, addr), pf_errstr(err));
|
||||
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
|
||||
panic("sys_kill failed: %d", s);
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Make sure this isn't a region that isn't supposed
|
||||
* to cause pagefaults.
|
||||
*/
|
||||
assert(!(region->flags & VR_NOPF));
|
||||
|
||||
/* We do not allow shared memory to cause pagefaults.
|
||||
* These pages have to be pre-allocated.
|
||||
*/
|
||||
assert(!(region->flags & VR_SHARED));
|
||||
|
||||
/* If process was writing, see if it's writable. */
|
||||
if(!(region->flags & VR_WRITABLE) && wr) {
|
||||
printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n",
|
||||
ep, arch_map2vir(vmp, addr), pf_errstr(err));
|
||||
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
|
||||
panic("sys_kill failed: %d", s);
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(addr >= region->vaddr);
|
||||
offset = addr - region->vaddr;
|
||||
|
||||
/* Access is allowed; handle it. */
|
||||
if((r=map_pf(vmp, region, offset, wr)) != OK) {
|
||||
printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep);
|
||||
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
|
||||
panic("sys_kill failed: %d", s);
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Pagefault is handled, so now reactivate the process. */
|
||||
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
|
||||
panic("sys_kill failed: %d", s);
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
/* Make sure this isn't a region that isn't supposed
|
||||
* to cause pagefaults.
|
||||
*/
|
||||
assert(!(region->flags & VR_NOPF));
|
||||
|
||||
/* We do not allow shared memory to cause pagefaults.
|
||||
* These pages have to be pre-allocated.
|
||||
*/
|
||||
assert(!(region->flags & VR_SHARED));
|
||||
|
||||
/* If process was writing, see if it's writable. */
|
||||
if(!(region->flags & VR_WRITABLE) && wr) {
|
||||
printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n",
|
||||
ep, arch_map2vir(vmp, addr), pf_errstr(err));
|
||||
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
|
||||
panic("sys_kill failed: %d", s);
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(addr >= region->vaddr);
|
||||
offset = addr - region->vaddr;
|
||||
|
||||
/* Access is allowed; handle it. */
|
||||
if((r=map_pf(vmp, region, offset, wr)) != OK) {
|
||||
printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep);
|
||||
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
|
||||
panic("sys_kill failed: %d", s);
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pagefault is handled, so now reactivate the process. */
|
||||
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
|
||||
panic("do_pagefaults: sys_vmctl failed: %d", ep);
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
|
|
|
@ -90,7 +90,7 @@ _PROTOTYPE(int do_shared_unmap, (message *m) );
|
|||
_PROTOTYPE(int do_get_refcount, (message *m) );
|
||||
|
||||
/* pagefaults.c */
|
||||
_PROTOTYPE( void do_pagefaults, (void) );
|
||||
_PROTOTYPE( void do_pagefaults, (message *m) );
|
||||
_PROTOTYPE( void do_memory, (void) );
|
||||
_PROTOTYPE( char *pf_errstr, (u32_t err));
|
||||
_PROTOTYPE( int handle_memory, (struct vmproc *vmp, vir_bytes mem,
|
||||
|
@ -115,9 +115,6 @@ _PROTOTYPE( int vm_addrok, (void *vir, int write) );
|
|||
_PROTOTYPE( void pt_sanitycheck, (pt_t *pt, char *file, int line) );
|
||||
#endif
|
||||
|
||||
/* $(ARCH)/pagefaults.c */
|
||||
_PROTOTYPE( int arch_get_pagefault, (endpoint_t *who, vir_bytes *addr, u32_t *err));
|
||||
|
||||
/* slaballoc.c */
|
||||
_PROTOTYPE(void *slaballoc,(int bytes));
|
||||
_PROTOTYPE(void slabfree,(void *mem, int bytes));
|
||||
|
|
Loading…
Reference in a new issue