minix/servers/vm/main.c
Cristiano Giuffrida cb176df60f New RS and new signal handling for system processes.
UPDATING INFO:
20100317:
        /usr/src/etc/system.conf updated to ignore default kernel calls: copy
        it (or merge it) to /etc/system.conf.
        The hello driver (/dev/hello) added to the distribution:
        # cd /usr/src/commands/scripts && make clean install
        # cd /dev && MAKEDEV hello

KERNEL CHANGES:
- Generic signal handling support. The kernel no longer assumes PM as a signal
manager for every process. The signal manager of a given process can now be
specified in its privilege slot. When a signal has to be delivered, the kernel
performs the lookup and forwards the signal to the appropriate signal manager.
PM is the default signal manager for user processes, RS is the default signal
manager for system processes. To enable ptrace()ing for system processes, it
is sufficient to change the default signal manager to PM. This will temporarily
disable crash recovery, though.
- sys_exit() is now split into sys_exit() (i.e. exit() for system processes,
which generates a self-termination signal), and sys_clear() (i.e. used by PM
to ask the kernel to clear a process slot when a process exits).
- Added a new kernel call (i.e. sys_update()) to swap two process slots and
implement live update.

PM CHANGES:
- Posix signal handling is no longer allowed for system processes. System
signals are split into two fixed categories: termination and non-termination
signals. When a non-termination signaled is processed, PM transforms the signal
into an IPC message and delivers the message to the system process. When a
termination signal is processed, PM terminates the process.
- PM no longer assumes itself as the signal manager for system processes. It now
makes sure that every system signal goes through the kernel before being
actually processes. The kernel will then dispatch the signal to the appropriate
signal manager which may or may not be PM.

SYSLIB CHANGES:
- Simplified SEF init and LU callbacks.
- Added additional predefined SEF callbacks to debug crash recovery and
live update.
- Fixed a temporary ack in the SEF init protocol. SEF init reply is now
completely synchronous.
- Added SEF signal event type to provide a uniform interface for system
processes to deal with signals. A sef_cb_signal_handler() callback is
available for system processes to handle every received signal. A
sef_cb_signal_manager() callback is used by signal managers to process
system signals on behalf of the kernel.
- Fixed a few bugs with memory mapping and DS.

VM CHANGES:
- Page faults and memory requests coming from the kernel are now implemented
using signals.
- Added a new VM call to swap two process slots and implement live update.
- The call is used by RS at update time and in turn invokes the kernel call
sys_update().

RS CHANGES:
- RS has been reworked with a better functional decomposition.
- Better kernel call masks. com.h now defines the set of very basic kernel calls
every system service is allowed to use. This makes system.conf simpler and
easier to maintain. In addition, this guarantees a higher level of isolation
for system libraries that use one or more kernel calls internally (e.g. printf).
- RS is the default signal manager for system processes. By default, RS
intercepts every signal delivered to every system process. This makes crash
recovery possible before bringing PM and friends in the loop.
- RS now supports fast rollback when something goes wrong while initializing
the new version during a live update.
- Live update is now implemented by keeping the two versions side-by-side and
swapping the process slots when the old version is ready to update.
- Crash recovery is now implemented by keeping the two versions side-by-side
and cleaning up the old version only when the recovery process is complete.

DS CHANGES:
- Fixed a bug when the process doing ds_publish() or ds_delete() is not known
by DS.
- Fixed the completely broken support for strings. String publishing is now
implemented in the system library and simply wraps publishing of memory ranges.
Ideally, we should adopt a similar approach for other data types as well.
- Test suite fixed.

DRIVER CHANGES:
- The hello driver has been added to the Minix distribution to demonstrate basic
live update and crash recovery functionalities.
- Other drivers have been adapted to conform the new SEF interface.
2010-03-17 01:15:29 +00:00

427 lines
11 KiB
C

#define _POSIX_SOURCE 1
#define _MINIX 1
#define _SYSTEM 1
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/config.h>
#include <minix/const.h>
#include <minix/ds.h>
#include <minix/endpoint.h>
#include <minix/keymap.h>
#include <minix/minlib.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <minix/sysutil.h>
#include <minix/syslib.h>
#include <minix/const.h>
#include <minix/bitmap.h>
#include <minix/crtso.h>
#include <minix/rs.h>
#include <errno.h>
#include <string.h>
#include <env.h>
#include <stdio.h>
#include <memory.h>
#define _MAIN 1
#include "glo.h"
#include "proto.h"
#include "util.h"
#include "vm.h"
#include "sanitycheck.h"
extern int missing_spares;
#include <machine/archtypes.h>
#include "../../kernel/const.h"
#include "../../kernel/config.h"
#include "../../kernel/proc.h"
#include <signal.h>
/* Table of calls and a macro to test for being in range. */
struct {
int (*vmc_func)(message *); /* Call handles message. */
char *vmc_name; /* Human-readable string. */
} vm_calls[NR_VM_CALLS];
/* Macro to verify call range and map 'high' range to 'base' range
* (starting at 0) in one. Evaluates to zero-based call number if call
* number is valid, returns -1 otherwise.
*/
#define CALLNUMBER(c) (((c) >= VM_RQ_BASE && \
(c) < VM_RQ_BASE + ELEMENTS(vm_calls)) ? \
((c) - VM_RQ_BASE) : -1)
FORWARD _PROTOTYPE(int map_service, (struct rprocpub *rpub));
FORWARD _PROTOTYPE(int vm_acl_ok, (endpoint_t caller, int call));
extern int unmap_ok;
/* SEF functions and variables. */
FORWARD _PROTOTYPE( void sef_local_startup, (void) );
FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
/*===========================================================================*
* main *
*===========================================================================*/
PUBLIC int main(void)
{
message msg;
int result, who_e;
sigset_t sigset;
/* SEF local startup. */
sef_local_startup();
/* This is VM's main loop. */
while (TRUE) {
int r, c;
SANITYCHECK(SCL_TOP);
if(missing_spares > 0) {
pt_cycle(); /* pagetable code wants to be called */
}
SANITYCHECK(SCL_DETAIL);
if ((r=sef_receive(ANY, &msg)) != OK)
panic("sef_receive() error: %d", r);
SANITYCHECK(SCL_DETAIL);
if(msg.m_type & NOTIFY_MESSAGE) {
/* Unexpected notify(). */
printf("VM: ignoring notify() from %d\n", msg.m_source);
continue;
}
who_e = msg.m_source;
c = CALLNUMBER(msg.m_type);
result = ENOSYS; /* Out of range or restricted calls return this. */
if(c < 0 || !vm_calls[c].vmc_func) {
printf("VM: out of range or missing callnr %d from %d\n",
msg.m_type, who_e);
} else if (vm_acl_ok(who_e, c) != OK) {
printf("VM: unauthorized %s by %d\n",
vm_calls[c].vmc_name, who_e);
} else {
SANITYCHECK(SCL_FUNCTIONS);
result = vm_calls[c].vmc_func(&msg);
SANITYCHECK(SCL_FUNCTIONS);
}
/* Send reply message, unless the return code is SUSPEND,
* which is a pseudo-result suppressing the reply message.
*/
if(result != SUSPEND) {
SANITYCHECK(SCL_DETAIL);
msg.m_type = result;
if((r=send(who_e, &msg)) != OK) {
printf("VM: couldn't send %d to %d (err %d)\n",
msg.m_type, who_e, r);
panic("send() error");
}
SANITYCHECK(SCL_DETAIL);
}
SANITYCHECK(SCL_DETAIL);
}
return(OK);
}
/*===========================================================================*
* sef_local_startup *
*===========================================================================*/
PRIVATE void sef_local_startup()
{
/* Register init callbacks. */
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_restart(sef_cb_init_fail);
/* No live update support for now. */
/* Register signal callbacks. */
sef_setcb_signal_handler(sef_cb_signal_handler);
/* Let SEF perform startup. */
sef_startup();
}
/*===========================================================================*
* sef_cb_init_fresh *
*===========================================================================*/
PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
{
/* Initialize the vm server. */
int s, i;
int click, clicksforgotten = 0;
struct memory mem_chunks[NR_MEMS];
struct boot_image image[NR_BOOT_PROCS];
struct boot_image *ip;
struct rprocpub rprocpub[NR_BOOT_PROCS];
phys_bytes limit = 0;
#if SANITYCHECKS
incheck = nocheck = 0;
FIXME("VM SANITYCHECKS are on");
#endif
vm_paged = 1;
env_parse("vm_paged", "d", 0, &vm_paged, 0, 1);
#if SANITYCHECKS
env_parse("vm_sanitychecklevel", "d", 0, &vm_sanitychecklevel, 0, SCL_MAX);
#endif
/* Get chunks of available memory. */
get_mem_chunks(mem_chunks);
/* Initialize VM's process table. Request a copy of the system
* image table that is defined at the kernel level to see which
* slots to fill in.
*/
if (OK != (s=sys_getimage(image)))
panic("couldn't get image table: %d", s);
/* Set table to 0. This invalidates all slots (clear VMF_INUSE). */
memset(vmproc, 0, sizeof(vmproc));
for(i = 0; i < ELEMENTS(vmproc); i++) {
vmproc[i].vm_slot = i;
}
/* Walk through boot-time system processes that are alive
* now and make valid slot entries for them.
*/
for (ip = &image[0]; ip < &image[NR_BOOT_PROCS]; ip++) {
phys_bytes proclimit;
struct vmproc *vmp;
if(ip->proc_nr >= _NR_PROCS) { panic("proc: %d", ip->proc_nr); }
if(ip->proc_nr < 0 && ip->proc_nr != SYSTEM) continue;
#define GETVMP(v, nr) \
if(nr >= 0) { \
vmp = &vmproc[ip->proc_nr]; \
} else if(nr == SYSTEM) { \
vmp = &vmproc[VMP_SYSTEM]; \
} else { \
panic("init: crazy proc_nr: %d", nr); \
}
/* Initialize normal process table slot or special SYSTEM
* table slot. Kernel memory is already reserved.
*/
GETVMP(vmp, ip->proc_nr);
/* reset fields as if exited */
clear_proc(vmp);
/* Get memory map for this process from the kernel. */
if ((s=get_mem_map(ip->proc_nr, vmp->vm_arch.vm_seg)) != OK)
panic("couldn't get process mem_map: %d", s);
/* Remove this memory from the free list. */
reserve_proc_mem(mem_chunks, vmp->vm_arch.vm_seg);
/* Set memory limit. */
proclimit = CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_phys +
vmp->vm_arch.vm_seg[S].mem_len) - 1;
if(proclimit > limit)
limit = proclimit;
vmp->vm_flags = VMF_INUSE;
vmp->vm_endpoint = ip->endpoint;
vmp->vm_stacktop =
CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len);
if (vmp->vm_arch.vm_seg[T].mem_len != 0)
vmp->vm_flags |= VMF_SEPARATE;
}
/* Architecture-dependent initialization. */
pt_init(limit);
/* Initialize tables to all physical memory. */
mem_init(mem_chunks);
meminit_done = 1;
/* Give these processes their own page table. */
for (ip = &image[0]; ip < &image[NR_BOOT_PROCS]; ip++) {
int s;
struct vmproc *vmp;
vir_bytes old_stacktop, old_stack;
if(ip->proc_nr < 0) continue;
GETVMP(vmp, ip->proc_nr);
if(!(ip->flags & PROC_FULLVM))
continue;
old_stack =
vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len -
vmp->vm_arch.vm_seg[D].mem_len;
if(pt_new(&vmp->vm_pt) != OK)
panic("VM: no new pagetable");
#define BASICSTACK VM_PAGE_SIZE
old_stacktop = CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len);
if(sys_vmctl(vmp->vm_endpoint, VMCTL_INCSP,
VM_STACKTOP - old_stacktop) != OK) {
panic("VM: vmctl for new stack failed");
}
FREE_MEM(vmp->vm_arch.vm_seg[D].mem_phys +
vmp->vm_arch.vm_seg[D].mem_len,
old_stack);
if(proc_new(vmp,
VM_PROCSTART,
CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_len),
CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_len),
BASICSTACK,
CLICK2ABS(vmp->vm_arch.vm_seg[S].mem_vir +
vmp->vm_arch.vm_seg[S].mem_len -
vmp->vm_arch.vm_seg[D].mem_len) - BASICSTACK,
CLICK2ABS(vmp->vm_arch.vm_seg[T].mem_phys),
CLICK2ABS(vmp->vm_arch.vm_seg[D].mem_phys),
VM_STACKTOP) != OK) {
panic("failed proc_new for boot process");
}
}
/* Set up table of calls. */
#define CALLMAP(code, func) { int i; \
if((i=CALLNUMBER(code)) < 0) { panic(#code " invalid: %d", (code)); } \
if(i >= NR_VM_CALLS) { panic(#code " invalid: %d", (code)); } \
vm_calls[i].vmc_func = (func); \
vm_calls[i].vmc_name = #code; \
}
/* Set call table to 0. This invalidates all calls (clear
* vmc_func).
*/
memset(vm_calls, 0, sizeof(vm_calls));
/* Basic VM calls. */
CALLMAP(VM_MMAP, do_mmap);
CALLMAP(VM_MUNMAP, do_munmap);
CALLMAP(VM_MUNMAP_TEXT, do_munmap);
CALLMAP(VM_MAP_PHYS, do_map_phys);
CALLMAP(VM_UNMAP_PHYS, do_unmap_phys);
/* Calls from PM. */
CALLMAP(VM_EXIT, do_exit);
CALLMAP(VM_FORK, do_fork);
CALLMAP(VM_BRK, do_brk);
CALLMAP(VM_EXEC_NEWMEM, do_exec_newmem);
CALLMAP(VM_PUSH_SIG, do_push_sig);
CALLMAP(VM_WILLEXIT, do_willexit);
CALLMAP(VM_ADDDMA, do_adddma);
CALLMAP(VM_DELDMA, do_deldma);
CALLMAP(VM_GETDMA, do_getdma);
CALLMAP(VM_NOTIFY_SIG, do_notify_sig);
/* Calls from RS */
CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv);
CALLMAP(VM_RS_UPDATE, do_rs_update);
/* Generic calls. */
CALLMAP(VM_REMAP, do_remap);
CALLMAP(VM_GETPHYS, do_get_phys);
CALLMAP(VM_SHM_UNMAP, do_shared_unmap);
CALLMAP(VM_GETREF, do_get_refcount);
CALLMAP(VM_INFO, do_info);
CALLMAP(VM_QUERY_EXIT, do_query_exit);
/* Sanity checks */
if(find_kernel_top() >= VM_PROCSTART)
panic("kernel loaded too high");
/* Initialize the structures for queryexit */
init_query_exit();
/* Unmap our own low pages. */
unmap_ok = 1;
_minix_unmapzero();
/* Map all the services in the boot image. */
if((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
(vir_bytes) rprocpub, sizeof(rprocpub), S)) != OK) {
panic("sys_safecopyfrom failed: %d", s);
}
for(i=0;i < NR_BOOT_PROCS;i++) {
if(rprocpub[i].in_use) {
if((s = map_service(&rprocpub[i])) != OK) {
panic("unable to map service: %d", s);
}
}
}
return(OK);
}
/*===========================================================================*
* sef_cb_signal_handler *
*===========================================================================*/
PRIVATE void sef_cb_signal_handler(int signo)
{
/* Check for known kernel signals, ignore anything else. */
switch(signo) {
/* There is a pending memory request from the kernel. */
case SIGKMEM:
do_memory();
break;
/* There is a pending page fault request from the kernel. */
case SIGKPF:
do_pagefaults();
break;
}
}
/*===========================================================================*
* map_service *
*===========================================================================*/
PRIVATE int map_service(rpub)
struct rprocpub *rpub;
{
/* Map a new service by initializing its call mask. */
int r, proc_nr;
if ((r = vm_isokendpt(rpub->endpoint, &proc_nr)) != OK) {
return r;
}
/* Copy the call mask. */
memcpy(&vmproc[proc_nr].vm_call_mask, &rpub->vm_call_mask,
sizeof(vmproc[proc_nr].vm_call_mask));
return(OK);
}
/*===========================================================================*
* vm_acl_ok *
*===========================================================================*/
PRIVATE int vm_acl_ok(endpoint_t caller, int call)
{
int n, r;
if ((r = vm_isokendpt(caller, &n)) != OK)
panic("VM: from strange source: %d", caller);
/* See if the call is allowed. */
if (!GET_BIT(vmproc[n].vm_call_mask, call)) {
return EPERM;
}
return OK;
}