Changed pagefault delivery to VM

this patch changes the way pagefaults are delivered to VM. It adopts
the same model as the out-of-quantum messages sent by kernel to a
scheduler.

- everytime a userspace pagefault occurs, kernel creates a message
  which is sent to VM on behalf of the faulting process

- the process is blocked on delivery to VM in the standard IPC code
  instead of waiting in a spacial in-kernel queue (stack) and is not
  runnable until VM tell kernel that the pagefault is resolved and is
  free to clear the RTS_PAGEFAULT flag.

- VM does not need call kernel and poll the pagefault information
  which saves many (1/2?) calls and kernel calls that return "no more
  data"

- VM notification by kernel does not need to use signals

- each entry in proc table is by 12 bytes smaller (~3k save)
This commit is contained in:
Tomas Hruby 2010-04-26 23:21:26 +00:00
parent a131085a5b
commit f51eea4b32
13 changed files with 104 additions and 178 deletions

View file

@ -28,16 +28,6 @@ typedef struct segframe {
struct segdesc_s p_ldt[LDT_SIZE]; /* CS, DS and remote */
} segframe_t;
/* Page fault event. Stored in process table. Only valid if PAGEFAULT
* set in p_rts_flags.
*/
struct pagefault
{
u32_t pf_virtual; /* Address causing fault (CR2). */
u32_t pf_flags; /* Pagefault flags on stack. */
};
/* fpu_state_s is used in kernel proc table.
* Any changes in this structure requires changes in sconst.h,
* since this structure is used in proc structure. */

View file

@ -614,7 +614,6 @@
/* Values for SVMCTL_PARAM. */
#define VMCTL_I386_SETCR3 10
#define VMCTL_GET_PAGEFAULT 11
#define VMCTL_CLEAR_PAGEFAULT 12
#define VMCTL_I386_GETCR3 13
#define VMCTL_MEMREQ_GET 14
@ -1018,6 +1017,11 @@
#define NR_VM_CALLS 42
#define VM_CALL_MASK_SIZE BITMAP_CHUNKS(NR_VM_CALLS)
/* not handled as a normal VM call, thus at the end of the reserved rage */
#define VM_PAGEFAULT (VM_RQ_BASE+0xff)
# define VPF_ADDR m1_i1
# define VPF_FLAGS m1_i2
/* Basic vm calls allowed to every process. */
#define VM_BASIC_CALLS \
VM_MMAP, VM_MUNMAP, VM_MUNMAP_TEXT, VM_MAP_PHYS, VM_UNMAP_PHYS

View file

@ -75,13 +75,12 @@ typedef unsigned long sigset_t;
#define IS_SIGS(signo) (signo>=SIGS_FIRST && signo<=SIGS_LAST)
/* Signals delivered by the kernel. */
#define SIGKPF 27 /* kernel page fault request pending */
#define SIGKMEM 28 /* kernel memory request pending */
#define SIGKMESS 29 /* new kernel message */
#define SIGKSIGSM 30 /* kernel signal pending for signal manager */
#define SIGKSIG 31 /* kernel signal pending */
#define SIGKMEM 27 /* kernel memory request pending */
#define SIGKMESS 28 /* new kernel message */
#define SIGKSIGSM 29 /* kernel signal pending for signal manager */
#define SIGKSIG 30 /* kernel signal pending */
#define SIGK_FIRST SIGKPF /* first kernel signal */
#define SIGK_FIRST SIGKMEM /* first kernel signal */
#define SIGK_LAST SIGKSIG /* last kernel signal */
#define IS_SIGK(signo) (signo>=SIGK_FIRST && signo<=SIGK_LAST)

View file

@ -41,19 +41,6 @@ struct proc *p;
/* Increase process SP. */
p->p_reg.sp += m_ptr->SVMCTL_VALUE;
return OK;
case VMCTL_GET_PAGEFAULT:
{
struct proc *rp;
if(!(rp=pagefaults))
return ESRCH;
pagefaults = rp->p_nextpagefault;
if(!RTS_ISSET(rp, RTS_PAGEFAULT))
panic( "non-PAGEFAULT process on pagefault chain: %d", rp->p_endpoint);
m_ptr->SVMCTL_PF_WHO = rp->p_endpoint;
m_ptr->SVMCTL_PF_I386_CR2 = rp->p_pagefault.pf_virtual;
m_ptr->SVMCTL_PF_I386_ERR = rp->p_pagefault.pf_flags;
return OK;
}
case VMCTL_I386_KERNELLIMIT:
{
int r;

View file

@ -20,6 +20,8 @@ void pagefault( struct proc *pr,
int in_physcopy = 0;
reg_t pagefaultcr2;
message m_pagefault;
int err;
assert(frame);
@ -77,17 +79,16 @@ void pagefault( struct proc *pr,
assert(!RTS_ISSET(pr, RTS_PAGEFAULT));
RTS_SET(pr, RTS_PAGEFAULT);
/* Save pagefault details, suspend process,
* add process to pagefault chain,
* and tell VM there is a pagefault to be
* handled.
*/
pr->p_pagefault.pf_virtual = pagefaultcr2;
pr->p_pagefault.pf_flags = frame->errcode;
pr->p_nextpagefault = pagefaults;
pagefaults = pr;
send_sig(VM_PROC_NR, SIGKPF);
/* tell Vm about the pagefault */
m_pagefault.m_source = pr->p_endpoint;
m_pagefault.m_type = VM_PAGEFAULT;
m_pagefault.VPF_ADDR = pagefaultcr2;
m_pagefault.VPF_FLAGS = frame->errcode;
if ((err = mini_send(pr, VM_PROC_NR,
&m_pagefault, FROM_KERNEL))) {
panic("WARNING: pagefault: mini_send returned %d\n", err);
}
return;
}

View file

@ -28,7 +28,6 @@ EXTERN struct loadinfo kloadinfo; /* status of load average */
EXTERN struct proc *proc_ptr; /* pointer to currently running process */
EXTERN struct proc *bill_ptr; /* process to bill for clock ticks */
EXTERN struct proc *vmrequest; /* first process on vmrequest queue */
EXTERN struct proc *pagefaults; /* first process on pagefault queue */
EXTERN unsigned lost_ticks; /* clock ticks counted outside clock task */

View file

@ -31,8 +31,6 @@ struct proc {
struct proc *p_scheduler; /* who should get out of quantum msg */
struct mem_map p_memmap[NR_LOCAL_SEGS]; /* memory map (T, D, S) */
struct pagefault p_pagefault; /* valid if PAGEFAULT in p_rts_flags set */
struct proc *p_nextpagefault; /* next on PAGEFAULT chain */
clock_t p_user_time; /* user time in ticks */
clock_t p_sys_time; /* sys time in ticks */

View file

@ -12,22 +12,6 @@ PUBLIC int sys_vmctl(endpoint_t who, int param, u32_t value)
return(r);
}
PUBLIC int sys_vmctl_get_pagefault_i386(endpoint_t *who, u32_t *cr2, u32_t *err)
{
message m;
int r;
m.SVMCTL_WHO = SELF;
m.SVMCTL_PARAM = VMCTL_GET_PAGEFAULT;
r = _kernel_call(SYS_VMCTL, &m);
if(r == OK) {
*who = m.SVMCTL_PF_WHO;
*cr2 = m.SVMCTL_PF_I386_CR2;
*err = m.SVMCTL_PF_I386_ERR;
}
return(r);
}
PUBLIC int sys_vmctl_get_cr3_i386(endpoint_t who, u32_t *cr3)
{
message m;

View file

@ -2,4 +2,4 @@
#Arch-specific sources
.PATH: ${.CURDIR}/arch/${ARCH}
SRCS+= vm.c pagetable.c arch_pagefaults.c #util.S
SRCS+= vm.c pagetable.c #util.S

View file

@ -1,39 +0,0 @@
#define _SYSTEM 1
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/config.h>
#include <minix/const.h>
#include <minix/ds.h>
#include <minix/endpoint.h>
#include <minix/keymap.h>
#include <minix/minlib.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/safecopies.h>
#include <minix/bitmap.h>
#include <errno.h>
#include <string.h>
#include <env.h>
#include <stdio.h>
#include <fcntl.h>
#include "glo.h"
#include "proto.h"
#include "util.h"
/*===========================================================================*
* arch_handle_pagefaults *
*===========================================================================*/
PUBLIC int arch_get_pagefault(who, addr, err)
endpoint_t *who;
vir_bytes *addr;
u32_t *err;
{
return sys_vmctl_get_pagefault_i386(who, addr, err);
}

View file

@ -74,7 +74,7 @@ FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
PUBLIC int main(void)
{
message msg;
int result, who_e;
int result, who_e, rcv_sts;
sigset_t sigset;
/* SEF local startup. */
@ -92,12 +92,12 @@ PUBLIC int main(void)
}
SANITYCHECK(SCL_DETAIL);
if ((r=sef_receive(ANY, &msg)) != OK)
panic("sef_receive() error: %d", r);
if ((r=sef_receive_status(ANY, &msg, &rcv_sts)) != OK)
panic("sef_receive_status() error: %d", r);
SANITYCHECK(SCL_DETAIL);
if(msg.m_type & NOTIFY_MESSAGE) {
if (is_ipc_notify(rcv_sts)) {
/* Unexpected notify(). */
printf("VM: ignoring notify() from %d\n", msg.m_source);
continue;
@ -105,16 +105,30 @@ PUBLIC int main(void)
who_e = msg.m_source;
c = CALLNUMBER(msg.m_type);
result = ENOSYS; /* Out of range or restricted calls return this. */
if(c < 0 || !vm_calls[c].vmc_func) {
if (msg.m_type == VM_PAGEFAULT) {
if (!IPC_STATUS_FLAGS_TEST(rcv_sts, IPC_FLG_MSG_FROM_KERNEL)) {
printf("VM: process %d faked VM_PAGEFAULT "
"message!\n", msg.m_source);
}
do_pagefaults(&msg);
/*
* do not reply to this call, the caller is unblocked by
* a sys_vmctl() call in do_pagefaults if success. VM panics
* otherwise
*/
continue;
} else if(c < 0 || !vm_calls[c].vmc_func) {
printf("VM: out of range or missing callnr %d from %d\n",
msg.m_type, who_e);
} else if (vm_acl_ok(who_e, c) != OK) {
printf("VM: unauthorized %s by %d\n",
vm_calls[c].vmc_name, who_e);
} else {
SANITYCHECK(SCL_FUNCTIONS);
result = vm_calls[c].vmc_func(&msg);
SANITYCHECK(SCL_FUNCTIONS);
if (vm_acl_ok(who_e, c) != OK) {
printf("VM: unauthorized %s by %d\n",
vm_calls[c].vmc_name, who_e);
} else {
SANITYCHECK(SCL_FUNCTIONS);
result = vm_calls[c].vmc_func(&msg);
SANITYCHECK(SCL_FUNCTIONS);
}
}
/* Send reply message, unless the return code is SUSPEND,
@ -381,10 +395,6 @@ PRIVATE void sef_cb_signal_handler(int signo)
case SIGKMEM:
do_memory();
break;
/* There is a pending page fault request from the kernel. */
case SIGKPF:
do_pagefaults();
break;
}
}

View file

@ -51,77 +51,73 @@ char *pf_errstr(u32_t err)
/*===========================================================================*
* do_pagefaults *
*===========================================================================*/
PUBLIC void do_pagefaults(void)
PUBLIC void do_pagefaults(message *m)
{
endpoint_t ep;
u32_t addr, err;
endpoint_t ep = m->m_source;
u32_t addr = m->VPF_ADDR;
u32_t err = m->VPF_FLAGS;
struct vmproc *vmp;
int r, s;
while((r=arch_get_pagefault(&ep, &addr, &err)) == OK) {
struct vir_region *region;
vir_bytes offset;
int p, wr = PFERR_WRITE(err);
struct vir_region *region;
vir_bytes offset;
int p, wr = PFERR_WRITE(err);
if(vm_isokendpt(ep, &p) != OK)
panic("do_pagefaults: endpoint wrong: %d", ep);
if(vm_isokendpt(ep, &p) != OK)
panic("do_pagefaults: endpoint wrong: %d", ep);
vmp = &vmproc[p];
assert(vmp->vm_flags & VMF_INUSE);
vmp = &vmproc[p];
assert(vmp->vm_flags & VMF_INUSE);
/* See if address is valid at all. */
if(!(region = map_lookup(vmp, addr))) {
assert(PFERR_NOPAGE(err));
printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx %s\n",
/* See if address is valid at all. */
if(!(region = map_lookup(vmp, addr))) {
assert(PFERR_NOPAGE(err));
printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx %s\n",
ep, arch_map2vir(vmp, addr), pf_errstr(err));
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
panic("sys_kill failed: %d", s);
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
continue;
}
/* Make sure this isn't a region that isn't supposed
* to cause pagefaults.
*/
assert(!(region->flags & VR_NOPF));
/* We do not allow shared memory to cause pagefaults.
* These pages have to be pre-allocated.
*/
assert(!(region->flags & VR_SHARED));
/* If process was writing, see if it's writable. */
if(!(region->flags & VR_WRITABLE) && wr) {
printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n",
ep, arch_map2vir(vmp, addr), pf_errstr(err));
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
panic("sys_kill failed: %d", s);
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
continue;
}
assert(addr >= region->vaddr);
offset = addr - region->vaddr;
/* Access is allowed; handle it. */
if((r=map_pf(vmp, region, offset, wr)) != OK) {
printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep);
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
panic("sys_kill failed: %d", s);
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
continue;
}
/* Pagefault is handled, so now reactivate the process. */
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
panic("sys_kill failed: %d", s);
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
return;
}
return;
/* Make sure this isn't a region that isn't supposed
* to cause pagefaults.
*/
assert(!(region->flags & VR_NOPF));
/* We do not allow shared memory to cause pagefaults.
* These pages have to be pre-allocated.
*/
assert(!(region->flags & VR_SHARED));
/* If process was writing, see if it's writable. */
if(!(region->flags & VR_WRITABLE) && wr) {
printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n",
ep, arch_map2vir(vmp, addr), pf_errstr(err));
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
panic("sys_kill failed: %d", s);
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
return;
}
assert(addr >= region->vaddr);
offset = addr - region->vaddr;
/* Access is allowed; handle it. */
if((r=map_pf(vmp, region, offset, wr)) != OK) {
printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep);
if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
panic("sys_kill failed: %d", s);
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
return;
}
/* Pagefault is handled, so now reactivate the process. */
if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, r)) != OK)
panic("do_pagefaults: sys_vmctl failed: %d", ep);
}
/*===========================================================================*

View file

@ -90,7 +90,7 @@ _PROTOTYPE(int do_shared_unmap, (message *m) );
_PROTOTYPE(int do_get_refcount, (message *m) );
/* pagefaults.c */
_PROTOTYPE( void do_pagefaults, (void) );
_PROTOTYPE( void do_pagefaults, (message *m) );
_PROTOTYPE( void do_memory, (void) );
_PROTOTYPE( char *pf_errstr, (u32_t err));
_PROTOTYPE( int handle_memory, (struct vmproc *vmp, vir_bytes mem,
@ -115,9 +115,6 @@ _PROTOTYPE( int vm_addrok, (void *vir, int write) );
_PROTOTYPE( void pt_sanitycheck, (pt_t *pt, char *file, int line) );
#endif
/* $(ARCH)/pagefaults.c */
_PROTOTYPE( int arch_get_pagefault, (endpoint_t *who, vir_bytes *addr, u32_t *err));
/* slaballoc.c */
_PROTOTYPE(void *slaballoc,(int bytes));
_PROTOTYPE(void slabfree,(void *mem, int bytes));