minix/servers/vm/main.c
Tomas Hruby f51eea4b32 Changed pagefault delivery to VM
this patch changes the way pagefaults are delivered to VM. It adopts
the same model as the out-of-quantum messages sent by kernel to a
scheduler.

- everytime a userspace pagefault occurs, kernel creates a message
  which is sent to VM on behalf of the faulting process

- the process is blocked on delivery to VM in the standard IPC code
  instead of waiting in a spacial in-kernel queue (stack) and is not
  runnable until VM tell kernel that the pagefault is resolved and is
  free to clear the RTS_PAGEFAULT flag.

- VM does not need call kernel and poll the pagefault information
  which saves many (1/2?) calls and kernel calls that return "no more
  data"

- VM notification by kernel does not need to use signals

- each entry in proc table is by 12 bytes smaller (~3k save)
2010-04-26 23:21:26 +00:00

438 lines
12 KiB
C

#define _POSIX_SOURCE 1
#define _MINIX 1
#define _SYSTEM 1
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/config.h>
#include <minix/const.h>
#include <minix/ds.h>
#include <minix/endpoint.h>
#include <minix/keymap.h>
#include <minix/minlib.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/const.h>
#include <minix/bitmap.h>
#include <minix/crtso.h>
#include <minix/rs.h>
#include <errno.h>
#include <string.h>
#include <env.h>
#include <stdio.h>
#include <assert.h>
#include <memory.h>
#define _MAIN 1
#include "glo.h"
#include "proto.h"
#include "util.h"
#include "vm.h"
#include "sanitycheck.h"
extern int missing_spares;
#include <machine/archtypes.h>
#include "kernel/const.h"
#include "kernel/config.h"
#include "kernel/proc.h"
#include <signal.h>
/* Table of calls and a macro to test for being in range. */
struct {
int (*vmc_func)(message *); /* Call handles message. */
char *vmc_name; /* Human-readable string. */
} vm_calls[NR_VM_CALLS];
/* Macro to verify call range and map 'high' range to 'base' range
* (starting at 0) in one. Evaluates to zero-based call number if call
* number is valid, returns -1 otherwise.
*/
#define CALLNUMBER(c) (((c) >= VM_RQ_BASE && \
(c) < VM_RQ_BASE + ELEMENTS(vm_calls)) ? \
((c) - VM_RQ_BASE) : -1)
FORWARD _PROTOTYPE(int map_service, (struct rprocpub *rpub));
FORWARD _PROTOTYPE(int vm_acl_ok, (endpoint_t caller, int call));
extern int unmap_ok;
/* SEF functions and variables. */
FORWARD _PROTOTYPE( void sef_local_startup, (void) );
FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
/*===========================================================================*
* main *
*===========================================================================*/
PUBLIC int main(void)
{
message msg;
int result, who_e, rcv_sts;
sigset_t sigset;
/* SEF local startup. */
sef_local_startup();
SANITYCHECK(SCL_TOP);
/* This is VM's main loop. */
while (TRUE) {
int r, c;
SANITYCHECK(SCL_TOP);
if(missing_spares > 0) {
pt_cycle(); /* pagetable code wants to be called */
}
SANITYCHECK(SCL_DETAIL);
if ((r=sef_receive_status(ANY, &msg, &rcv_sts)) != OK)
panic("sef_receive_status() error: %d", r);
SANITYCHECK(SCL_DETAIL);
if (is_ipc_notify(rcv_sts)) {
/* Unexpected notify(). */
printf("VM: ignoring notify() from %d\n", msg.m_source);
continue;
}
who_e = msg.m_source;
c = CALLNUMBER(msg.m_type);
result = ENOSYS; /* Out of range or restricted calls return this. */
if (msg.m_type == VM_PAGEFAULT) {
if (!IPC_STATUS_FLAGS_TEST(rcv_sts, IPC_FLG_MSG_FROM_KERNEL)) {
printf("VM: process %d faked VM_PAGEFAULT "
"message!\n", msg.m_source);
}
do_pagefaults(&msg);
/*
* do not reply to this call, the caller is unblocked by
* a sys_vmctl() call in do_pagefaults if success. VM panics
* otherwise
*/
continue;
} else if(c < 0 || !vm_calls[c].vmc_func) {
printf("VM: out of range or missing callnr %d from %d\n",
msg.m_type, who_e);
} else {
if (vm_acl_ok(who_e, c) != OK) {
printf("VM: unauthorized %s by %d\n",
vm_calls[c].vmc_name, who_e);
} else {
SANITYCHECK(SCL_FUNCTIONS);
result = vm_calls[c].vmc_func(&msg);
SANITYCHECK(SCL_FUNCTIONS);
}
}
/* Send reply message, unless the return code is SUSPEND,
* which is a pseudo-result suppressing the reply message.
*/
if(result != SUSPEND) {
SANITYCHECK(SCL_DETAIL);
msg.m_type = result;
if((r=send(who_e, &msg)) != OK) {
printf("VM: couldn't send %d to %d (err %d)\n",
msg.m_type, who_e, r);
panic("send() error");
}
SANITYCHECK(SCL_DETAIL);
}
SANITYCHECK(SCL_DETAIL);
}
return(OK);
}
/*===========================================================================*
* sef_local_startup *
*===========================================================================*/
PRIVATE void sef_local_startup()
{
/* Register init callbacks. */
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_restart(sef_cb_init_fail);
/* No live update support for now. */
/* Register signal callbacks. */
sef_setcb_signal_handler(sef_cb_signal_handler);
/* Let SEF perform startup. */
sef_startup();
}
/*===========================================================================*
* sef_cb_init_fresh *
*===========================================================================*/
PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
{
/* Initialize the vm server. */
int s, i;
int click, clicksforgotten = 0;
struct memory mem_chunks[NR_MEMS];
struct boot_image image[NR_BOOT_PROCS];
struct boot_image *ip;
struct rprocpub rprocpub[NR_BOOT_PROCS];
phys_bytes limit = 0;
#if SANITYCHECKS
incheck = nocheck = 0;
#endif
vm_paged = 1;
env_parse("vm_paged", "d", 0, &vm_paged, 0, 1);
#if SANITYCHECKS
env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX);
#endif
/* Get chunks of available memory. */
get_mem_chunks(mem_chunks);
/* Initialize VM's process table. Request a copy of the system
* image table that is defined at the kernel level to see which
* slots to fill in.
*/
if (OK != (s=sys_getimage(image)))
panic("couldn't get image table: %d", s);
/* Set table to 0. This invalidates all slots (clear VMF_INUSE). */
memset(vmproc, 0, sizeof(vmproc));
for(i = 0; i < ELEMENTS(vmproc); i++) {
vmproc[i].vm_slot = i;
}
/* Walk through boot-time system processes that are alive
* now and make valid slot entries for them.
*/
for (ip = &image[0]; ip < &image[NR_BOOT_PROCS]; ip++) {
phys_bytes proclimit;
struct vmproc *vmp;
if(ip->proc_nr >= _NR_PROCS) { panic("proc: %d", ip->proc_nr); }
if(ip->proc_nr < 0 && ip->proc_nr != SYSTEM) continue;
#define GETVMP(v, nr) \
if(nr >= 0) { \
vmp = &vmproc[ip->proc_nr]; \
} else if(nr == SYSTEM) { \
vmp = &vmproc[VMP_SYSTEM]; \
} else { \
panic("init: crazy proc_nr: %d", nr); \
}
/* Initialize normal process table slot or special SYSTEM
* table slot. Kernel memory is already reserved.
*/
GETVMP(vmp, ip->proc_nr);
/* reset fields as if exited */
clear_proc(vmp);
/* Get memory map for this process from the kernel. */
if ((s=get_mem_map(ip->proc_nr, vmp->vm_arch.vm_seg)) != OK)
panic("couldn't get process mem_map: %d", s);
/* Remove this memory from the free list. */
reserve_proc_mem(mem_chunks, vmp->vm_arch.vm_seg);
/* Set memory limit. */
proclimit = CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_phys +
vmp->vm_arch.vm_seg[S].mem_len) - 1;
if(proclimit > limit)
limit = proclimit;
vmp->vm_flags = VMF_INUSE;
vmp->vm_endpoint = ip->endpoint;
vmp->vm_stacktop =
CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len);
if (vmp->vm_arch.vm_seg[T].mem_len != 0)
vmp->vm_flags |= VMF_SEPARATE;
}
/* Architecture-dependent initialization. */
pt_init(limit);
/* Initialize tables to all physical memory. */
mem_init(mem_chunks);
meminit_done = 1;
/* Give these processes their own page table. */
for (ip = &image[0]; ip < &image[NR_BOOT_PROCS]; ip++) {
int s;
struct vmproc *vmp;
vir_bytes old_stacktop, old_stack;
if(ip->proc_nr < 0) continue;
GETVMP(vmp, ip->proc_nr);
if(!(ip->flags & PROC_FULLVM))
continue;
old_stack =
vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len -
vmp->vm_arch.vm_seg[D].mem_len;
if(pt_new(&vmp->vm_pt) != OK)
panic("VM: no new pagetable");
#define BASICSTACK VM_PAGE_SIZE
old_stacktop = CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len);
if(sys_vmctl(vmp->vm_endpoint, VMCTL_INCSP,
VM_STACKTOP - old_stacktop) != OK) {
panic("VM: vmctl for new stack failed");
}
free_mem(vmp->vm_arch.vm_seg[D].mem_phys +
vmp->vm_arch.vm_seg[D].mem_len,
old_stack);
if(proc_new(vmp,
VM_PROCSTART,
CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_len),
CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_len),
BASICSTACK,
CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len -
vmp->vm_arch.vm_seg[D].mem_len) - BASICSTACK,
CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys),
CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys),
VM_STACKTOP, 0) != OK) {
panic("failed proc_new for boot process");
}
}
/* Set up table of calls. */
#define CALLMAP(code, func) { int i; \
if((i=CALLNUMBER(code)) < 0) { panic(#code " invalid: %d", (code)); } \
if(i >= NR_VM_CALLS) { panic(#code " invalid: %d", (code)); } \
vm_calls[i].vmc_func = (func); \
vm_calls[i].vmc_name = #code; \
}
/* Set call table to 0. This invalidates all calls (clear
* vmc_func).
*/
memset(vm_calls, 0, sizeof(vm_calls));
/* Basic VM calls. */
CALLMAP(VM_MMAP, do_mmap);
CALLMAP(VM_MUNMAP, do_munmap);
CALLMAP(VM_MUNMAP_TEXT, do_munmap);
CALLMAP(VM_MAP_PHYS, do_map_phys);
CALLMAP(VM_UNMAP_PHYS, do_unmap_phys);
/* Calls from PM. */
CALLMAP(VM_EXIT, do_exit);
CALLMAP(VM_FORK, do_fork);
CALLMAP(VM_BRK, do_brk);
CALLMAP(VM_EXEC_NEWMEM, do_exec_newmem);
CALLMAP(VM_PUSH_SIG, do_push_sig);
CALLMAP(VM_WILLEXIT, do_willexit);
CALLMAP(VM_ADDDMA, do_adddma);
CALLMAP(VM_DELDMA, do_deldma);
CALLMAP(VM_GETDMA, do_getdma);
CALLMAP(VM_NOTIFY_SIG, do_notify_sig);
/* Calls from RS */
CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv);
CALLMAP(VM_RS_UPDATE, do_rs_update);
/* Generic calls. */
CALLMAP(VM_REMAP, do_remap);
CALLMAP(VM_GETPHYS, do_get_phys);
CALLMAP(VM_SHM_UNMAP, do_shared_unmap);
CALLMAP(VM_GETREF, do_get_refcount);
CALLMAP(VM_INFO, do_info);
CALLMAP(VM_QUERY_EXIT, do_query_exit);
/* Sanity checks */
if(find_kernel_top() >= VM_PROCSTART)
panic("kernel loaded too high");
/* Initialize the structures for queryexit */
init_query_exit();
/* Unmap our own low pages. */
unmap_ok = 1;
_minix_unmapzero();
/* Map all the services in the boot image. */
if((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
(vir_bytes) rprocpub, sizeof(rprocpub), S)) != OK) {
panic("sys_safecopyfrom failed: %d", s);
}
for(i=0;i < NR_BOOT_PROCS;i++) {
if(rprocpub[i].in_use) {
if((s = map_service(&rprocpub[i])) != OK) {
panic("unable to map service: %d", s);
}
}
}
return(OK);
}
/*===========================================================================*
* sef_cb_signal_handler *
*===========================================================================*/
PRIVATE void sef_cb_signal_handler(int signo)
{
/* Check for known kernel signals, ignore anything else. */
switch(signo) {
/* There is a pending memory request from the kernel. */
case SIGKMEM:
do_memory();
break;
}
}
/*===========================================================================*
* map_service *
*===========================================================================*/
PRIVATE int map_service(rpub)
struct rprocpub *rpub;
{
/* Map a new service by initializing its call mask. */
int r, proc_nr;
if ((r = vm_isokendpt(rpub->endpoint, &proc_nr)) != OK) {
return r;
}
/* Copy the call mask. */
memcpy(&vmproc[proc_nr].vm_call_mask, &rpub->vm_call_mask,
sizeof(vmproc[proc_nr].vm_call_mask));
return(OK);
}
/*===========================================================================*
* vm_acl_ok *
*===========================================================================*/
PRIVATE int vm_acl_ok(endpoint_t caller, int call)
{
int n, r;
if ((r = vm_isokendpt(caller, &n)) != OK)
panic("VM: from strange source: %d", caller);
/* See if the call is allowed. */
if (!GET_BIT(vmproc[n].vm_call_mask, call)) {
return EPERM;
}
return OK;
}