Crash recovery and live update support for VM.
This commit is contained in:
parent
0b364d00bc
commit
91a83fe265
7 changed files with 286 additions and 53 deletions
|
@ -1058,7 +1058,8 @@
|
|||
#define VM_RS_MEMCTL (VM_RQ_BASE+42)
|
||||
# define VM_RS_CTL_ENDPT m1_i1
|
||||
# define VM_RS_CTL_REQ m1_i2
|
||||
# define VM_RS_MEM_PIN 0 /* pin memory */
|
||||
# define VM_RS_MEM_PIN 0 /* pin memory */
|
||||
# define VM_RS_MEM_MAKE_VM 1 /* make VM instance */
|
||||
|
||||
/* Total. */
|
||||
#define NR_VM_CALLS 43
|
||||
|
|
|
@ -552,6 +552,19 @@ struct rproc *rp;
|
|||
*/
|
||||
setuid(0);
|
||||
|
||||
/* If this is a VM instance, let VM know now. */
|
||||
if(rp->r_priv.s_flags & VM_SYS_PROC) {
|
||||
if(rs_verbose)
|
||||
printf("RS: informing VM of instance %s\n", srv_to_string(rp));
|
||||
|
||||
s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM);
|
||||
if(s != OK) {
|
||||
printf("vm_memctl failed: %d\n", s);
|
||||
cleanup_service(rp);
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
/* Tell VM about allowed calls. */
|
||||
if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0])) != OK) {
|
||||
printf("RS: vm_set_priv failed: %d\n", s);
|
||||
|
|
|
@ -526,6 +526,119 @@ PRIVATE char *ptestr(u32_t pte)
|
|||
return str;
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* pt_map_in_range *
|
||||
*===========================================================================*/
|
||||
PUBLIC int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
|
||||
vir_bytes start, vir_bytes end)
|
||||
{
|
||||
/* Transfer all the mappings from the pt of the source process to the pt of
|
||||
* the destination process in the range specified.
|
||||
*/
|
||||
int pde, pte;
|
||||
int r;
|
||||
vir_bytes viraddr, mapaddr;
|
||||
pt_t *pt, *dst_pt;
|
||||
|
||||
pt = &src_vmp->vm_pt;
|
||||
dst_pt = &dst_vmp->vm_pt;
|
||||
|
||||
end = end ? end : VM_DATATOP;
|
||||
assert(start % I386_PAGE_SIZE == 0);
|
||||
assert(end % I386_PAGE_SIZE == 0);
|
||||
assert(I386_VM_PDE(start) >= proc_pde && start <= end);
|
||||
assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES);
|
||||
|
||||
#if LU_DEBUG
|
||||
printf("VM: pt_map_in_range: src = %d, dst = %d\n",
|
||||
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
|
||||
printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
|
||||
start, I386_VM_PDE(start), I386_VM_PTE(start),
|
||||
end, I386_VM_PDE(end), I386_VM_PTE(end));
|
||||
#endif
|
||||
|
||||
/* Scan all page-table entries in the range. */
|
||||
for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) {
|
||||
pde = I386_VM_PDE(viraddr);
|
||||
if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
|
||||
if(viraddr == VM_DATATOP) break;
|
||||
continue;
|
||||
}
|
||||
pte = I386_VM_PTE(viraddr);
|
||||
if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
|
||||
if(viraddr == VM_DATATOP) break;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Transfer the mapping. */
|
||||
dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
|
||||
|
||||
if(viraddr == VM_DATATOP) break;
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* pt_ptmap *
|
||||
*===========================================================================*/
|
||||
PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
|
||||
{
|
||||
/* Transfer mappings to page dir and page tables from source process and
|
||||
* destination process. Make sure all the mappings are above the stack, not
|
||||
* to corrupt valid mappings in the data segment of the destination process.
|
||||
*/
|
||||
int pde, r;
|
||||
phys_bytes physaddr;
|
||||
vir_bytes viraddr;
|
||||
pt_t *pt;
|
||||
|
||||
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
|
||||
pt = &src_vmp->vm_pt;
|
||||
|
||||
#if LU_DEBUG
|
||||
printf("VM: pt_ptmap: src = %d, dst = %d\n",
|
||||
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
|
||||
#endif
|
||||
|
||||
/* Transfer mapping to the page directory. */
|
||||
assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop);
|
||||
viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir);
|
||||
physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
|
||||
if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
|
||||
I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
|
||||
WMF_OVERWRITE)) != OK) {
|
||||
return r;
|
||||
}
|
||||
#if LU_DEBUG
|
||||
printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
|
||||
viraddr, physaddr);
|
||||
#endif
|
||||
|
||||
/* Scan all non-reserved page-directory entries. */
|
||||
for(pde=proc_pde; pde < I386_VM_DIR_ENTRIES; pde++) {
|
||||
if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Transfer mapping to the page table. */
|
||||
assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop);
|
||||
viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]);
|
||||
physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
|
||||
if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
|
||||
I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
|
||||
WMF_OVERWRITE)) != OK) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
#if LU_DEBUG
|
||||
printf("VM: pt_ptmap: transferred mappings to page tables, pde range %d - %d\n",
|
||||
proc_pde, I386_VM_DIR_ENTRIES - 1);
|
||||
#endif
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* pt_writemap *
|
||||
*===========================================================================*/
|
||||
|
@ -920,7 +1033,12 @@ PUBLIC void pt_init(phys_bytes usedlimit)
|
|||
|
||||
/* Back to reality - this is where the stack actually is. */
|
||||
vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks;
|
||||
|
||||
|
||||
/* Pretend VM stack top is the same as any regular process, not to
|
||||
* have discrepancies with new VM instances later on.
|
||||
*/
|
||||
vmprocess->vm_stacktop = VM_STACKTOP;
|
||||
|
||||
/* All OK. */
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -44,7 +44,8 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks,
|
|||
_PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) );
|
||||
_PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) );
|
||||
_PROTOTYPE( int do_info, (message *) );
|
||||
_PROTOTYPE( int swap_proc, (endpoint_t src_e, endpoint_t dst_e) );
|
||||
_PROTOTYPE( int swap_proc_slot, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
|
||||
_PROTOTYPE( int swap_proc_dyn_data, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
|
||||
|
||||
/* exit.c */
|
||||
_PROTOTYPE( void clear_proc, (struct vmproc *vmp) );
|
||||
|
@ -101,6 +102,9 @@ _PROTOTYPE( void pt_init_mem, (void) );
|
|||
_PROTOTYPE( void pt_check, (struct vmproc *vmp) );
|
||||
_PROTOTYPE( int pt_new, (pt_t *pt) );
|
||||
_PROTOTYPE( void pt_free, (pt_t *pt) );
|
||||
_PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp,
|
||||
vir_bytes start, vir_bytes end) );
|
||||
_PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) );
|
||||
_PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end,
|
||||
u32_t flags, int verify));
|
||||
_PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr,
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <env.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include "glo.h"
|
||||
#include "proto.h"
|
||||
|
@ -61,11 +62,26 @@ PUBLIC int do_rs_set_priv(message *m)
|
|||
PUBLIC int do_rs_update(message *m_ptr)
|
||||
{
|
||||
endpoint_t src_e, dst_e, reply_e;
|
||||
int src_p, dst_p;
|
||||
struct vmproc *src_vmp, *dst_vmp;
|
||||
struct vir_region *vr;
|
||||
int r;
|
||||
|
||||
src_e = m_ptr->VM_RS_SRC_ENDPT;
|
||||
dst_e = m_ptr->VM_RS_DST_ENDPT;
|
||||
|
||||
/* Lookup slots for source and destination process. */
|
||||
if(vm_isokendpt(src_e, &src_p) != OK) {
|
||||
printf("do_rs_update: bad src endpoint %d\n", src_e);
|
||||
return EINVAL;
|
||||
}
|
||||
src_vmp = &vmproc[src_p];
|
||||
if(vm_isokendpt(dst_e, &dst_p) != OK) {
|
||||
printf("do_rs_update: bad dst endpoint %d\n", dst_e);
|
||||
return EINVAL;
|
||||
}
|
||||
dst_vmp = &vmproc[dst_p];
|
||||
|
||||
/* Let the kernel do the update first. */
|
||||
r = sys_update(src_e, dst_e);
|
||||
if(r != OK) {
|
||||
|
@ -73,15 +89,21 @@ PUBLIC int do_rs_update(message *m_ptr)
|
|||
}
|
||||
|
||||
/* Do the update in VM now. */
|
||||
r = swap_proc(src_e, dst_e);
|
||||
r = swap_proc_slot(src_vmp, dst_vmp);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
r = swap_proc_dyn_data(src_vmp, dst_vmp);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
pt_bind(&src_vmp->vm_pt, src_vmp);
|
||||
pt_bind(&dst_vmp->vm_pt, dst_vmp);
|
||||
|
||||
/* Reply, update-aware. */
|
||||
reply_e = m_ptr->m_source;
|
||||
if(reply_e == src_e) reply_e = dst_e;
|
||||
if(reply_e == dst_e) reply_e = src_e;
|
||||
else if(reply_e == dst_e) reply_e = src_e;
|
||||
m_ptr->m_type = OK;
|
||||
r = send(reply_e, m_ptr);
|
||||
if(r != OK) {
|
||||
|
@ -91,6 +113,55 @@ PUBLIC int do_rs_update(message *m_ptr)
|
|||
return SUSPEND;
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* rs_memctl_make_vm_instance *
|
||||
*===========================================================================*/
|
||||
PRIVATE int rs_memctl_make_vm_instance(struct vmproc *new_vm_vmp)
|
||||
{
|
||||
int vm_p, r;
|
||||
u32_t flags;
|
||||
int verify;
|
||||
struct vmproc *this_vm_vmp;
|
||||
|
||||
this_vm_vmp = &vmproc[VM_PROC_NR];
|
||||
|
||||
/* Copy settings from current VM. */
|
||||
new_vm_vmp->vm_stacktop = this_vm_vmp->vm_stacktop;
|
||||
new_vm_vmp->vm_arch.vm_data_top = this_vm_vmp->vm_arch.vm_data_top;
|
||||
|
||||
/* Pin memory for the new VM instance. */
|
||||
r = map_pin_memory(new_vm_vmp);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Preallocate page tables for the entire address space for both
|
||||
* VM and the new VM instance.
|
||||
*/
|
||||
flags = 0;
|
||||
verify = FALSE;
|
||||
r = pt_ptalloc_in_range(&this_vm_vmp->vm_pt, 0, 0, flags, verify);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
r = pt_ptalloc_in_range(&new_vm_vmp->vm_pt, 0, 0, flags, verify);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Let the new VM instance map VM's page tables and its own. */
|
||||
r = pt_ptmap(this_vm_vmp, new_vm_vmp);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
r = pt_ptmap(new_vm_vmp, new_vm_vmp);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* do_rs_memctl *
|
||||
*===========================================================================*/
|
||||
|
@ -116,7 +187,9 @@ PUBLIC int do_rs_memctl(message *m_ptr)
|
|||
case VM_RS_MEM_PIN:
|
||||
r = map_pin_memory(vmp);
|
||||
return r;
|
||||
|
||||
case VM_RS_MEM_MAKE_VM:
|
||||
r = rs_memctl_make_vm_instance(vmp);
|
||||
return r;
|
||||
default:
|
||||
printf("do_rs_memctl: bad request %d\n", req);
|
||||
return EINVAL;
|
||||
|
|
|
@ -37,8 +37,6 @@
|
|||
#include "kernel/type.h"
|
||||
#include "kernel/proc.h"
|
||||
|
||||
#define SWAP_PROC_DEBUG 0
|
||||
|
||||
/*===========================================================================*
|
||||
* get_mem_map *
|
||||
*===========================================================================*/
|
||||
|
@ -232,36 +230,16 @@ PUBLIC int do_info(message *m)
|
|||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* swap_proc *
|
||||
* swap_proc_slot *
|
||||
*===========================================================================*/
|
||||
PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
|
||||
PUBLIC int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
|
||||
{
|
||||
struct vmproc *src_vmp, *dst_vmp;
|
||||
struct vmproc orig_src_vmproc, orig_dst_vmproc;
|
||||
int src_p, dst_p, r;
|
||||
struct vir_region *vr;
|
||||
|
||||
/* Lookup slots for source and destination process. */
|
||||
if(vm_isokendpt(src_e, &src_p) != OK) {
|
||||
printf("swap_proc: bad src endpoint %d\n", src_e);
|
||||
return EINVAL;
|
||||
}
|
||||
src_vmp = &vmproc[src_p];
|
||||
if(vm_isokendpt(dst_e, &dst_p) != OK) {
|
||||
printf("swap_proc: bad dst endpoint %d\n", dst_e);
|
||||
return EINVAL;
|
||||
}
|
||||
dst_vmp = &vmproc[dst_p];
|
||||
|
||||
#if SWAP_PROC_DEBUG
|
||||
printf("swap_proc: swapping %d (%d, %d) and %d (%d, %d)\n",
|
||||
src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
|
||||
dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
|
||||
|
||||
printf("swap_proc: map_printmap for source before swapping:\n");
|
||||
map_printmap(src_vmp);
|
||||
printf("swap_proc: map_printmap for destination before swapping:\n");
|
||||
map_printmap(dst_vmp);
|
||||
#if LU_DEBUG
|
||||
printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
|
||||
src_vmp->vm_endpoint, src_vmp->vm_slot,
|
||||
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
|
||||
#endif
|
||||
|
||||
/* Save existing data. */
|
||||
|
@ -278,7 +256,52 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
|
|||
dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
|
||||
dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
|
||||
|
||||
/* Preserve vir_region's parents. */
|
||||
/* Preserve yielded blocks. */
|
||||
src_vmp->vm_yielded_blocks = orig_src_vmproc.vm_yielded_blocks;
|
||||
dst_vmp->vm_yielded_blocks = orig_dst_vmproc.vm_yielded_blocks;
|
||||
|
||||
#if LU_DEBUG
|
||||
printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
|
||||
src_vmp->vm_endpoint, src_vmp->vm_slot,
|
||||
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
|
||||
#endif
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
* swap_proc_dyn_data *
|
||||
*===========================================================================*/
|
||||
PUBLIC int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
|
||||
{
|
||||
struct vir_region *vr;
|
||||
int is_vm;
|
||||
int r;
|
||||
|
||||
is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
|
||||
|
||||
/* For VM, transfer memory regions above the stack first. */
|
||||
if(is_vm) {
|
||||
#if LU_DEBUG
|
||||
printf("VM: swap_proc_dyn_data: tranferring regions above the stack from old VM (%d) to new VM (%d)\n",
|
||||
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
|
||||
#endif
|
||||
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
|
||||
r = pt_map_in_range(src_vmp, dst_vmp,
|
||||
arch_vir2map(src_vmp, src_vmp->vm_stacktop), 0);
|
||||
if(r != OK) {
|
||||
printf("swap_proc_dyn_data: pt_map_in_range failed\n");
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
#if LU_DEBUG
|
||||
printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
|
||||
src_vmp->vm_endpoint, src_vmp->vm_slot,
|
||||
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
|
||||
#endif
|
||||
|
||||
/* Swap vir_regions' parents. */
|
||||
for(vr = src_vmp->vm_regions; vr; vr = vr->next) {
|
||||
USE(vr, vr->parent = src_vmp;);
|
||||
}
|
||||
|
@ -286,25 +309,25 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
|
|||
USE(vr, vr->parent = dst_vmp;);
|
||||
}
|
||||
|
||||
/* Adjust page tables. */
|
||||
if(src_vmp->vm_flags & VMF_HASPT)
|
||||
pt_bind(&src_vmp->vm_pt, src_vmp);
|
||||
if(dst_vmp->vm_flags & VMF_HASPT)
|
||||
pt_bind(&dst_vmp->vm_pt, dst_vmp);
|
||||
if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
|
||||
panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r);
|
||||
}
|
||||
|
||||
#if SWAP_PROC_DEBUG
|
||||
printf("swap_proc: swapped %d (%d, %d) and %d (%d, %d)\n",
|
||||
src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
|
||||
dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
|
||||
|
||||
printf("swap_proc: map_printmap for source after swapping:\n");
|
||||
map_printmap(src_vmp);
|
||||
printf("swap_proc: map_printmap for destination after swapping:\n");
|
||||
map_printmap(dst_vmp);
|
||||
/* For regular processes, transfer regions above the stack now.
|
||||
* In case of rollback, we need to skip this step. To sandbox the
|
||||
* new instance and prevent state corruption on rollback, we share all
|
||||
* the regions between the two instances as COW.
|
||||
*/
|
||||
if(!is_vm && (dst_vmp->vm_flags & VMF_HASPT)) {
|
||||
vr = map_lookup(dst_vmp, arch_vir2map(dst_vmp, dst_vmp->vm_stacktop));
|
||||
if(vr && !map_lookup(src_vmp, arch_vir2map(src_vmp, src_vmp->vm_stacktop))) {
|
||||
#if LU_DEBUG
|
||||
printf("VM: swap_proc_dyn_data: tranferring regions above the stack from %d to %d\n",
|
||||
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
|
||||
#endif
|
||||
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
|
||||
r = map_proc_copy_from(src_vmp, dst_vmp, vr);
|
||||
if(r != OK) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
|
||||
/* How noisy are we supposed to be? */
|
||||
#define VERBOSE 0
|
||||
#define LU_DEBUG 1
|
||||
|
||||
/* Minimum stack region size - 64MB. */
|
||||
#define MINSTACKREGION (64*1024*1024)
|
||||
|
|
Loading…
Reference in a new issue