SEF: query VM about holes during state transfer

The 'memory' service has holes in its data section, which causes
problems during state transfer.  Since VM cannot handle page faults
during a multicomponent-with-VM live update, the state transfer must
ensure that no page faults occur during copying.  Therefore, we now
query VM about the regions to copy, thus skipping holes.  While the
solution is not ideal, it is sufficiently generic that it can be used
for the data section state transfer of all processes, and possibly
for state transfer of other regions in the future as well.

Change-Id: I2a71383a18643ebd36956c396fbd22c8fd137202
This commit is contained in:
David van Moolenbroek 2015-07-18 13:04:34 +02:00
parent 683f1fcab3
commit 2867e60add
6 changed files with 109 additions and 22 deletions

View file

@ -114,8 +114,8 @@ static void sef_local_startup()
{
/* Register init callbacks. */
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_lu(sef_cb_init_fresh);
sef_setcb_init_restart(sef_cb_init_fresh);
sef_setcb_init_lu(SEF_CB_INIT_LU_DEFAULT);
sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
/* Let SEF perform startup. */
sef_startup();

View file

@ -12,7 +12,7 @@
#define ANY_TSK _ENDPOINT(3, _ENDPOINT_P(ANY))
/* IPC filter constants. */
#define IPCF_MAX_ELEMENTS NR_SYS_PROCS
#define IPCF_MAX_ELEMENTS (NR_SYS_PROCS * 2)
/* IPC filter flags. */
#define IPCF_MATCH_M_SOURCE 0x1

View file

@ -337,7 +337,7 @@ void sef_setcb_gcov(sef_cb_gcov_t cb);
int sef_copy_state_region_ctl(sef_init_info_t *info,
vir_bytes *src_address, vir_bytes *dst_address);
int sef_copy_state_region(sef_init_info_t *info,
vir_bytes address, size_t size, vir_bytes dst_address);
vir_bytes address, size_t size, vir_bytes dst_address, int may_have_holes);
int sef_st_state_transfer(sef_init_info_t *info);
/* Callback prototypes to be passed to the State Transfer framwork. */

View file

@ -1,9 +1,12 @@
#include "syslib.h"
#include <assert.h>
#include <unistd.h>
#include <minix/sysutil.h>
#include <string.h>
#include <machine/vmparam.h>
#include <minix/sysutil.h>
#include "syslib.h"
/* SEF Init callbacks. */
static struct sef_init_cbs {
sef_cb_init_t sef_cb_init_fresh;
@ -356,11 +359,11 @@ int sef_cb_init_identity_state_transfer(int type, sef_init_info_t *info)
/* Transfer data. */
size = (size_t)(_brksize - data_start);
r = sef_copy_state_region(info, data_start, size, data_start);
if(r != OK) {
printf("sef_cb_init_identity_state_transfer: data transfer failed\n");
r = sef_copy_state_region(info, data_start, size, data_start,
TRUE /*may_have_holes*/);
if (r != OK)
return r;
}
new_brksize = _brksize;
@ -384,7 +387,7 @@ int sef_cb_init_identity_state_transfer(int type, sef_init_info_t *info)
assert(_brksize == new_brksize);
size = (size_t)(_brksize - old_brksize);
r = sef_copy_state_region(info, (vir_bytes) old_brksize, size,
(vir_bytes) old_brksize);
(vir_bytes) old_brksize, FALSE /*may_have_holes*/);
if(r != OK) {
printf("sef_cb_init_identity_state_transfer: extended heap transfer failed\n");
return r;

View file

@ -4,12 +4,15 @@
#include <machine/archtypes.h>
#include <minix/timers.h>
#include <minix/sysutil.h>
#include <minix/vm.h>
#include "kernel/config.h"
#include "kernel/const.h"
#include "kernel/type.h"
#include "kernel/proc.h"
EXTERN endpoint_t sef_self_endpoint;
/* SEF Live update prototypes for sef_receive(). */
void do_sef_st_before_receive(void);
@ -43,9 +46,14 @@ int sef_copy_state_region_ctl(sef_init_info_t *info, vir_bytes *src_address, vir
* sef_copy_state_region *
*===========================================================================*/
int sef_copy_state_region(sef_init_info_t *info,
vir_bytes address, size_t size, vir_bytes dst_address)
vir_bytes address, size_t size, vir_bytes dst_address, int may_have_holes)
{
vir_bytes base, top, target;
struct vm_region_info vri;
int r;
base = address;
if(sef_copy_state_region_ctl(info, &address, &dst_address)) {
#if STATE_TRANS_DEBUG
printf("sef_copy_state_region: memcpy %d bytes, addr = 0x%08x -> 0x%08x...\n",
@ -53,19 +61,80 @@ int sef_copy_state_region(sef_init_info_t *info,
#endif
/* memcpy region from current state */
memcpy((void*) dst_address, (void *)address, size);
} else {
} else if (may_have_holes && sef_self_endpoint != VM_PROC_NR &&
vm_info_region(info->old_endpoint, &vri, 1, &base) == 1) {
/* Perform a safe copy of a region of the old state. The section may
* contain holes, so ask VM for the actual regions within the data
* section and transfer each one separately. The alternative, just
* copying until a page fault happens, is not possible in the multi-
* component-with-VM live update case, where VM may not receive page
* faults during the live update window. For now, we use the region
* iteration approach for the data section only; other cases have not
* been tested, but may work as well.
*/
#if STATE_TRANS_DEBUG
printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> 0x%08x, gid = %d, source = %d...\n",
size, address, dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint);
printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> "
"0x%08x, gid = %d, source = %d, with holes...\n", size, address,
dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint);
#endif
/* Perform a safe copy of a region of the old state. */
if((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID, address,
dst_address, size)) != OK) {
/* The following is somewhat of a hack: the start of the data section
* may in fact not be page-aligned and may be part of the last page of
* of the preceding (text) section. Therefore, if the first region we
* find starts above the known base address, blindly copy the area in
* between.
*/
if (vri.vri_addr > address) {
if ((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID,
address, dst_address, vri.vri_addr - address)) != OK) {
#if STATE_TRANS_DEBUG
printf("sef_copy_state_region: sys_safecopyfrom failed\n");
#endif
return r;
}
}
top = address + size;
do {
assert(vri.vri_addr >= address);
if (vri.vri_addr >= top)
break;
if (vri.vri_length > top - vri.vri_addr)
vri.vri_length = top - vri.vri_addr;
target = dst_address + (vri.vri_addr - address);
if ((r = sys_safecopyfrom(info->old_endpoint,
SEF_STATE_TRANSFER_GID, vri.vri_addr, target,
vri.vri_length)) != OK) {
#if STATE_TRANS_DEBUG
printf("sef_copy_state_region: sys_safecopyfrom failed\n");
#endif
return r;
}
/* Save on a VM call if the next address is already too high. */
if (base >= top)
break;
} while (vm_info_region(info->old_endpoint, &vri, 1, &base) == 1);
} else {
/* Perform a safe copy of a region of the old state, without taking into
* account any holes. This is the default for anything but the data
* section, with a few additioanl exceptions: VM can't query VM, so
* simply assume there are no holes; also, if we fail to get one region
* for the old process (and this is presumably possible if its heap is
* so small it fits in the last text page, see above), we also just
* blindly copy over the entire data section.
*/
#if STATE_TRANS_DEBUG
printf("sef_copy_state_region: copying %d bytes, addr = 0x%08x -> "
"0x%08x, gid = %d, source = %d, without holes...\n", size, address,
dst_address, SEF_STATE_TRANSFER_GID, info->old_endpoint);
#endif
if ((r = sys_safecopyfrom(info->old_endpoint, SEF_STATE_TRANSFER_GID,
address, dst_address, size)) != OK) {
#if STATE_TRANS_DEBUG
printf("sef_copy_state_region: sys_safecopyfrom failed\n");
#endif
return r;
}
}
}
return OK;
@ -85,7 +154,7 @@ int sef_copy_state_region(sef_init_info_t *info,
}
if (sef_copy_state_region(info, old_priv.s_state_table
, sef_llvm_state_table_size(), (vir_bytes) addr))
, sef_llvm_state_table_size(), (vir_bytes) addr, FALSE /*may_have_holes*/))
{
printf("ERROR. state table transfer failed\n");
return EGENERIC;
@ -111,7 +180,8 @@ int sef_copy_state_region_opaque(void *info_opaque, uint32_t address,
{
assert(info_opaque != NULL && "Invalid info_opaque pointer.");
return sef_copy_state_region((sef_init_info_t *)(info_opaque),
(vir_bytes) address, size, (vir_bytes) dst_address);
(vir_bytes) address, size, (vir_bytes) dst_address,
FALSE /*may_have_holes*/);
}
/*===========================================================================*

View file

@ -601,7 +601,13 @@ static int sef_cb_init_vm_multi_lu(int type, sef_init_info_t *info)
m.m_source = VM_PROC_NR;
for(i=0;i < NR_SYS_PROCS;i++) {
if(rprocpub[i].in_use && rprocpub[i].old_endpoint != NONE) {
if(num_elements <= IPCF_MAX_ELEMENTS-3) {
if(num_elements <= IPCF_MAX_ELEMENTS-5) {
/* VM_BRK is needed for normal operation during the live
* update. VM_INFO is needed for state transfer in the
* light of holes. Pagefaults and handle-memory requests
* are blocked intentionally, as handling these would
* prevent VM from being able to roll back.
*/
ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE;
ipc_filter[num_elements].m_source = rprocpub[i].old_endpoint;
if(!(info->flags & SEF_LU_UNSAFE)) {
@ -616,6 +622,14 @@ static int sef_cb_init_vm_multi_lu(int type, sef_init_info_t *info)
ipc_filter[num_elements].m_type = VM_BRK;
}
num_elements++;
if(!(info->flags & SEF_LU_UNSAFE)) {
ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE | IPCF_MATCH_M_TYPE;
ipc_filter[num_elements].m_source = rprocpub[i].old_endpoint;
ipc_filter[num_elements++].m_type = VM_INFO;
ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE | IPCF_MATCH_M_TYPE;
ipc_filter[num_elements].m_source = rprocpub[i].new_endpoint;
ipc_filter[num_elements++].m_type = VM_INFO;
}
/* Make sure we can talk to any RS instance. */
if(rprocpub[i].old_endpoint == RS_PROC_NR) {
ipc_filter[num_elements].flags = IPCF_MATCH_M_SOURCE;