Crash recovery and live update support for VM.

This commit is contained in:
Cristiano Giuffrida 2010-07-20 23:03:52 +00:00
parent 0b364d00bc
commit 91a83fe265
7 changed files with 286 additions and 53 deletions

View file

@ -1058,7 +1058,8 @@
#define VM_RS_MEMCTL (VM_RQ_BASE+42)
# define VM_RS_CTL_ENDPT m1_i1
# define VM_RS_CTL_REQ m1_i2
# define VM_RS_MEM_PIN 0 /* pin memory */
# define VM_RS_MEM_PIN 0 /* pin memory */
# define VM_RS_MEM_MAKE_VM 1 /* make VM instance */
/* Total. */
#define NR_VM_CALLS 43

View file

@ -552,6 +552,19 @@ struct rproc *rp;
*/
setuid(0);
/* If this is a VM instance, let VM know now. */
if(rp->r_priv.s_flags & VM_SYS_PROC) {
if(rs_verbose)
printf("RS: informing VM of instance %s\n", srv_to_string(rp));
s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM);
if(s != OK) {
printf("vm_memctl failed: %d\n", s);
cleanup_service(rp);
return s;
}
}
/* Tell VM about allowed calls. */
if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0])) != OK) {
printf("RS: vm_set_priv failed: %d\n", s);

View file

@ -526,6 +526,119 @@ PRIVATE char *ptestr(u32_t pte)
return str;
}
/*===========================================================================*
* pt_map_in_range *
*===========================================================================*/
PUBLIC int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
vir_bytes start, vir_bytes end)
{
/* Transfer all the mappings from the pt of the source process to the pt of
* the destination process in the range specified.
*/
int pde, pte;
int r;
vir_bytes viraddr, mapaddr;
pt_t *pt, *dst_pt;
pt = &src_vmp->vm_pt;
dst_pt = &dst_vmp->vm_pt;
end = end ? end : VM_DATATOP;
assert(start % I386_PAGE_SIZE == 0);
assert(end % I386_PAGE_SIZE == 0);
assert(I386_VM_PDE(start) >= proc_pde && start <= end);
assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES);
#if LU_DEBUG
printf("VM: pt_map_in_range: src = %d, dst = %d\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
start, I386_VM_PDE(start), I386_VM_PTE(start),
end, I386_VM_PDE(end), I386_VM_PTE(end));
#endif
/* Scan all page-table entries in the range. */
for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) {
pde = I386_VM_PDE(viraddr);
if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
if(viraddr == VM_DATATOP) break;
continue;
}
pte = I386_VM_PTE(viraddr);
if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
if(viraddr == VM_DATATOP) break;
continue;
}
/* Transfer the mapping. */
dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
if(viraddr == VM_DATATOP) break;
}
return OK;
}
/*===========================================================================*
* pt_ptmap *
*===========================================================================*/
PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{
/* Transfer mappings to page dir and page tables from source process and
* destination process. Make sure all the mappings are above the stack, not
* to corrupt valid mappings in the data segment of the destination process.
*/
int pde, r;
phys_bytes physaddr;
vir_bytes viraddr;
pt_t *pt;
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
pt = &src_vmp->vm_pt;
#if LU_DEBUG
printf("VM: pt_ptmap: src = %d, dst = %d\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
#endif
/* Transfer mapping to the page directory. */
assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop);
viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir);
physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
WMF_OVERWRITE)) != OK) {
return r;
}
#if LU_DEBUG
printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
viraddr, physaddr);
#endif
/* Scan all non-reserved page-directory entries. */
for(pde=proc_pde; pde < I386_VM_DIR_ENTRIES; pde++) {
if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
continue;
}
/* Transfer mapping to the page table. */
assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop);
viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]);
physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
WMF_OVERWRITE)) != OK) {
return r;
}
}
#if LU_DEBUG
printf("VM: pt_ptmap: transferred mappings to page tables, pde range %d - %d\n",
proc_pde, I386_VM_DIR_ENTRIES - 1);
#endif
return OK;
}
/*===========================================================================*
* pt_writemap *
*===========================================================================*/
@ -920,7 +1033,12 @@ PUBLIC void pt_init(phys_bytes usedlimit)
/* Back to reality - this is where the stack actually is. */
vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks;
/* Pretend VM stack top is the same as any regular process, not to
* have discrepancies with new VM instances later on.
*/
vmprocess->vm_stacktop = VM_STACKTOP;
/* All OK. */
return;
}

View file

@ -44,7 +44,8 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks,
_PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) );
_PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) );
_PROTOTYPE( int do_info, (message *) );
_PROTOTYPE( int swap_proc, (endpoint_t src_e, endpoint_t dst_e) );
_PROTOTYPE( int swap_proc_slot, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
_PROTOTYPE( int swap_proc_dyn_data, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
/* exit.c */
_PROTOTYPE( void clear_proc, (struct vmproc *vmp) );
@ -101,6 +102,9 @@ _PROTOTYPE( void pt_init_mem, (void) );
_PROTOTYPE( void pt_check, (struct vmproc *vmp) );
_PROTOTYPE( int pt_new, (pt_t *pt) );
_PROTOTYPE( void pt_free, (pt_t *pt) );
_PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp,
vir_bytes start, vir_bytes end) );
_PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) );
_PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end,
u32_t flags, int verify));
_PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr,

View file

@ -21,6 +21,7 @@
#include <env.h>
#include <stdio.h>
#include <assert.h>
#include <memory.h>
#include "glo.h"
#include "proto.h"
@ -61,11 +62,26 @@ PUBLIC int do_rs_set_priv(message *m)
PUBLIC int do_rs_update(message *m_ptr)
{
endpoint_t src_e, dst_e, reply_e;
int src_p, dst_p;
struct vmproc *src_vmp, *dst_vmp;
struct vir_region *vr;
int r;
src_e = m_ptr->VM_RS_SRC_ENDPT;
dst_e = m_ptr->VM_RS_DST_ENDPT;
/* Lookup slots for source and destination process. */
if(vm_isokendpt(src_e, &src_p) != OK) {
printf("do_rs_update: bad src endpoint %d\n", src_e);
return EINVAL;
}
src_vmp = &vmproc[src_p];
if(vm_isokendpt(dst_e, &dst_p) != OK) {
printf("do_rs_update: bad dst endpoint %d\n", dst_e);
return EINVAL;
}
dst_vmp = &vmproc[dst_p];
/* Let the kernel do the update first. */
r = sys_update(src_e, dst_e);
if(r != OK) {
@ -73,15 +89,21 @@ PUBLIC int do_rs_update(message *m_ptr)
}
/* Do the update in VM now. */
r = swap_proc(src_e, dst_e);
r = swap_proc_slot(src_vmp, dst_vmp);
if(r != OK) {
return r;
}
r = swap_proc_dyn_data(src_vmp, dst_vmp);
if(r != OK) {
return r;
}
pt_bind(&src_vmp->vm_pt, src_vmp);
pt_bind(&dst_vmp->vm_pt, dst_vmp);
/* Reply, update-aware. */
reply_e = m_ptr->m_source;
if(reply_e == src_e) reply_e = dst_e;
if(reply_e == dst_e) reply_e = src_e;
else if(reply_e == dst_e) reply_e = src_e;
m_ptr->m_type = OK;
r = send(reply_e, m_ptr);
if(r != OK) {
@ -91,6 +113,55 @@ PUBLIC int do_rs_update(message *m_ptr)
return SUSPEND;
}
/*===========================================================================*
* rs_memctl_make_vm_instance *
*===========================================================================*/
PRIVATE int rs_memctl_make_vm_instance(struct vmproc *new_vm_vmp)
{
int vm_p, r;
u32_t flags;
int verify;
struct vmproc *this_vm_vmp;
this_vm_vmp = &vmproc[VM_PROC_NR];
/* Copy settings from current VM. */
new_vm_vmp->vm_stacktop = this_vm_vmp->vm_stacktop;
new_vm_vmp->vm_arch.vm_data_top = this_vm_vmp->vm_arch.vm_data_top;
/* Pin memory for the new VM instance. */
r = map_pin_memory(new_vm_vmp);
if(r != OK) {
return r;
}
/* Preallocate page tables for the entire address space for both
* VM and the new VM instance.
*/
flags = 0;
verify = FALSE;
r = pt_ptalloc_in_range(&this_vm_vmp->vm_pt, 0, 0, flags, verify);
if(r != OK) {
return r;
}
r = pt_ptalloc_in_range(&new_vm_vmp->vm_pt, 0, 0, flags, verify);
if(r != OK) {
return r;
}
/* Let the new VM instance map VM's page tables and its own. */
r = pt_ptmap(this_vm_vmp, new_vm_vmp);
if(r != OK) {
return r;
}
r = pt_ptmap(new_vm_vmp, new_vm_vmp);
if(r != OK) {
return r;
}
return OK;
}
/*===========================================================================*
* do_rs_memctl *
*===========================================================================*/
@ -116,7 +187,9 @@ PUBLIC int do_rs_memctl(message *m_ptr)
case VM_RS_MEM_PIN:
r = map_pin_memory(vmp);
return r;
case VM_RS_MEM_MAKE_VM:
r = rs_memctl_make_vm_instance(vmp);
return r;
default:
printf("do_rs_memctl: bad request %d\n", req);
return EINVAL;

View file

@ -37,8 +37,6 @@
#include "kernel/type.h"
#include "kernel/proc.h"
#define SWAP_PROC_DEBUG 0
/*===========================================================================*
* get_mem_map *
*===========================================================================*/
@ -232,36 +230,16 @@ PUBLIC int do_info(message *m)
}
/*===========================================================================*
* swap_proc *
* swap_proc_slot *
*===========================================================================*/
PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
PUBLIC int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{
struct vmproc *src_vmp, *dst_vmp;
struct vmproc orig_src_vmproc, orig_dst_vmproc;
int src_p, dst_p, r;
struct vir_region *vr;
/* Lookup slots for source and destination process. */
if(vm_isokendpt(src_e, &src_p) != OK) {
printf("swap_proc: bad src endpoint %d\n", src_e);
return EINVAL;
}
src_vmp = &vmproc[src_p];
if(vm_isokendpt(dst_e, &dst_p) != OK) {
printf("swap_proc: bad dst endpoint %d\n", dst_e);
return EINVAL;
}
dst_vmp = &vmproc[dst_p];
#if SWAP_PROC_DEBUG
printf("swap_proc: swapping %d (%d, %d) and %d (%d, %d)\n",
src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
printf("swap_proc: map_printmap for source before swapping:\n");
map_printmap(src_vmp);
printf("swap_proc: map_printmap for destination before swapping:\n");
map_printmap(dst_vmp);
#if LU_DEBUG
printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
src_vmp->vm_endpoint, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
#endif
/* Save existing data. */
@ -278,7 +256,52 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
/* Preserve vir_region's parents. */
/* Preserve yielded blocks. */
src_vmp->vm_yielded_blocks = orig_src_vmproc.vm_yielded_blocks;
dst_vmp->vm_yielded_blocks = orig_dst_vmproc.vm_yielded_blocks;
#if LU_DEBUG
printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
src_vmp->vm_endpoint, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
#endif
return OK;
}
/*===========================================================================*
* swap_proc_dyn_data *
*===========================================================================*/
PUBLIC int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{
struct vir_region *vr;
int is_vm;
int r;
is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
/* For VM, transfer memory regions above the stack first. */
if(is_vm) {
#if LU_DEBUG
printf("VM: swap_proc_dyn_data: tranferring regions above the stack from old VM (%d) to new VM (%d)\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
#endif
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
r = pt_map_in_range(src_vmp, dst_vmp,
arch_vir2map(src_vmp, src_vmp->vm_stacktop), 0);
if(r != OK) {
printf("swap_proc_dyn_data: pt_map_in_range failed\n");
return r;
}
}
#if LU_DEBUG
printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
src_vmp->vm_endpoint, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
#endif
/* Swap vir_regions' parents. */
for(vr = src_vmp->vm_regions; vr; vr = vr->next) {
USE(vr, vr->parent = src_vmp;);
}
@ -286,25 +309,25 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
USE(vr, vr->parent = dst_vmp;);
}
/* Adjust page tables. */
if(src_vmp->vm_flags & VMF_HASPT)
pt_bind(&src_vmp->vm_pt, src_vmp);
if(dst_vmp->vm_flags & VMF_HASPT)
pt_bind(&dst_vmp->vm_pt, dst_vmp);
if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r);
}
#if SWAP_PROC_DEBUG
printf("swap_proc: swapped %d (%d, %d) and %d (%d, %d)\n",
src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
printf("swap_proc: map_printmap for source after swapping:\n");
map_printmap(src_vmp);
printf("swap_proc: map_printmap for destination after swapping:\n");
map_printmap(dst_vmp);
/* For regular processes, transfer regions above the stack now.
* In case of rollback, we need to skip this step. To sandbox the
* new instance and prevent state corruption on rollback, we share all
* the regions between the two instances as COW.
*/
if(!is_vm && (dst_vmp->vm_flags & VMF_HASPT)) {
vr = map_lookup(dst_vmp, arch_vir2map(dst_vmp, dst_vmp->vm_stacktop));
if(vr && !map_lookup(src_vmp, arch_vir2map(src_vmp, src_vmp->vm_stacktop))) {
#if LU_DEBUG
printf("VM: swap_proc_dyn_data: tranferring regions above the stack from %d to %d\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
#endif
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
r = map_proc_copy_from(src_vmp, dst_vmp, vr);
if(r != OK) {
return r;
}
}
}
return OK;
}

View file

@ -28,6 +28,7 @@
/* How noisy are we supposed to be? */
#define VERBOSE 0
#define LU_DEBUG 1
/* Minimum stack region size - 64MB. */
#define MINSTACKREGION (64*1024*1024)