RS live update support.

This commit is contained in:
Cristiano Giuffrida 2010-07-09 18:29:04 +00:00
parent 895850b8cf
commit 8427d774b6
15 changed files with 348 additions and 123 deletions

View file

@ -531,6 +531,7 @@
#define SYS_PRIV_ADD_IRQ 7 /* Add IRQ */
#define SYS_PRIV_QUERY_MEM 8 /* Verify memory privilege. */
#define SYS_PRIV_UPDATE_SYS 9 /* Update a sys privilege structure. */
#define SYS_PRIV_YIELD 10 /* Allow process to run and suspend */
/* Field names for SYS_SETGRANT */
#define SG_ADDR m2_p1 /* address */

View file

@ -9,5 +9,14 @@
} \
}
#define NOT_REACHABLE do { \
panic("NOT_REACHABLE at %s:%d", __FILE__, __LINE__); \
for(;;); \
} while(0)
#define NOT_IMPLEMENTED do { \
panic("NOT_IMPLEMENTED at %s:%d", __FILE__, __LINE__); \
} while(0)
#endif /* _MINIX_DEBUG_H */

View file

@ -35,26 +35,32 @@ typedef struct {
/* Callback type definitions. */
typedef int(*sef_cb_init_t)(int type, sef_init_info_t *info);
typedef int(*sef_cb_init_response_t)(message *m_ptr);
/* Callback registration helpers. */
_PROTOTYPE( void sef_setcb_init_fresh, (sef_cb_init_t cb));
_PROTOTYPE( void sef_setcb_init_lu, (sef_cb_init_t cb));
_PROTOTYPE( void sef_setcb_init_restart, (sef_cb_init_t cb));
_PROTOTYPE( void sef_setcb_init_response, (sef_cb_init_response_t cb) );
/* Predefined callback implementations. */
_PROTOTYPE( int sef_cb_init_null, (int type, sef_init_info_t *info) );
_PROTOTYPE( int sef_cb_init_response_null, (message *m_ptr) );
_PROTOTYPE( int sef_cb_init_fail, (int type, sef_init_info_t *info) );
_PROTOTYPE( int sef_cb_init_crash, (int type, sef_init_info_t *info) );
_PROTOTYPE( int sef_cb_init_response_rs_reply, (message *m_ptr) );
/* Macros for predefined callback implementations. */
#define SEF_CB_INIT_FRESH_NULL sef_cb_init_null
#define SEF_CB_INIT_LU_NULL sef_cb_init_null
#define SEF_CB_INIT_RESTART_NULL sef_cb_init_null
#define SEF_CB_INIT_RESPONSE_NULL sef_cb_init_response_null
#define SEF_CB_INIT_FRESH_DEFAULT sef_cb_init_null
#define SEF_CB_INIT_LU_DEFAULT sef_cb_init_null
#define SEF_CB_INIT_RESTART_DEFAULT sef_cb_init_null
#define SEF_CB_INIT_RESPONSE_DEFAULT sef_cb_init_response_rs_reply
/* Init types. */
#define SEF_INIT_FRESH 0 /* init fresh */
@ -121,6 +127,7 @@ typedef int(*sef_cb_lu_state_isvalid_t)(int);
typedef void(*sef_cb_lu_state_changed_t)(int, int);
typedef void(*sef_cb_lu_state_dump_t)(int);
typedef int(*sef_cb_lu_state_save_t)(int);
typedef int(*sef_cb_lu_response_t)(message *m_ptr);
/* Callback registration helpers. */
_PROTOTYPE( void sef_setcb_lu_prepare, (sef_cb_lu_prepare_t cb) );
@ -128,6 +135,7 @@ _PROTOTYPE( void sef_setcb_lu_state_isvalid, (sef_cb_lu_state_isvalid_t cb) );
_PROTOTYPE( void sef_setcb_lu_state_changed, (sef_cb_lu_state_changed_t cb) );
_PROTOTYPE( void sef_setcb_lu_state_dump, (sef_cb_lu_state_dump_t cb) );
_PROTOTYPE( void sef_setcb_lu_state_save, (sef_cb_lu_state_save_t cb) );
_PROTOTYPE( void sef_setcb_lu_response, (sef_cb_lu_response_t cb) );
/* Predefined callback implementations. */
_PROTOTYPE( int sef_cb_lu_prepare_null, (int state) );
@ -135,12 +143,14 @@ _PROTOTYPE( int sef_cb_lu_state_isvalid_null, (int state) );
_PROTOTYPE( void sef_cb_lu_state_changed_null, (int old_state, int state) );
_PROTOTYPE( void sef_cb_lu_state_dump_null, (int state) );
_PROTOTYPE( int sef_cb_lu_state_save_null, (int state) );
_PROTOTYPE( int sef_cb_lu_response_null, (message *m_ptr) );
_PROTOTYPE( int sef_cb_lu_prepare_always_ready, (int state) );
_PROTOTYPE( int sef_cb_lu_prepare_never_ready, (int state) );
_PROTOTYPE( int sef_cb_lu_prepare_crash, (int state) );
_PROTOTYPE( int sef_cb_lu_state_isvalid_standard, (int state) );
_PROTOTYPE( int sef_cb_lu_state_isvalid_workfree, (int state) );
_PROTOTYPE( int sef_cb_lu_response_rs_reply, (message *m_ptr) );
/* Macros for predefined callback implementations. */
#define SEF_CB_LU_PREPARE_NULL sef_cb_lu_prepare_null
@ -148,12 +158,14 @@ _PROTOTYPE( int sef_cb_lu_state_isvalid_workfree, (int state) );
#define SEF_CB_LU_STATE_CHANGED_NULL sef_cb_lu_state_changed_null
#define SEF_CB_LU_STATE_DUMP_NULL sef_cb_lu_state_dump_null
#define SEF_CB_LU_STATE_SAVE_NULL sef_cb_lu_state_save_null
#define SEF_CB_LU_RESPONSE_NULL sef_cb_lu_response_null
#define SEF_CB_LU_PREPARE_DEFAULT sef_cb_lu_prepare_null
#define SEF_CB_LU_STATE_ISVALID_DEFAULT sef_cb_lu_state_isvalid_null
#define SEF_CB_LU_STATE_CHANGED_DEFAULT sef_cb_lu_state_changed_null
#define SEF_CB_LU_STATE_DUMP_DEFAULT sef_cb_lu_state_dump_null
#define SEF_CB_LU_STATE_SAVE_DEFAULT sef_cb_lu_state_save_null
#define SEF_CB_LU_RESPONSE_DEFAULT sef_cb_lu_response_rs_reply
/* Standard live update states. */
#define SEF_LU_STATE_NULL 0 /* null state */

View file

@ -55,15 +55,6 @@
#define TRACE(code, statement)
#endif
#define NOT_REACHABLE do { \
panic("NOT_REACHABLE at %s:%d", __FILE__, __LINE__); \
for(;;); \
} while(0)
#define NOT_IMPLEMENTED do { \
panic("NOT_IMPLEMENTED at %s:%d", __FILE__, __LINE__); \
} while(0)
#ifdef CONFIG_BOOT_VERBOSE
#define BOOT_VERBOSE(x) x
#else

View file

@ -59,6 +59,15 @@ PUBLIC int do_privctl(struct proc * caller, message * m_ptr)
RTS_UNSET(rp, RTS_NO_PRIV);
return(OK);
case SYS_PRIV_YIELD:
/* Allow process to run and suspend the caller. */
if (!RTS_ISSET(rp, RTS_NO_PRIV) || priv(rp)->s_proc_nr == NONE) {
return(EPERM);
}
RTS_SET(caller, RTS_NO_PRIV);
RTS_UNSET(rp, RTS_NO_PRIV);
return(OK);
case SYS_PRIV_DISALLOW:
/* Disallow process from running. */
if (RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM);

View file

@ -27,7 +27,7 @@ FORWARD _PROTOTYPE(int safecopy, (struct proc *, endpoint_t, endpoint_t,
cp_grant_id_t, int, int, size_t, vir_bytes, vir_bytes, int));
#define HASGRANTTABLE(gr) \
(!RTS_ISSET(gr, RTS_NO_PRIV) && priv(gr) && priv(gr)->s_grant_table > 0)
(priv(gr) && priv(gr)->s_grant_table)
/*===========================================================================*
* verify_grant *
@ -67,7 +67,12 @@ endpoint_t *e_granter; /* new granter (magic grants) */
* priv. structure, or the grant table in the priv. structure
* is too small for the grant, return EPERM.
*/
if(!HASGRANTTABLE(granter_proc)) return EPERM;
if(!HASGRANTTABLE(granter_proc)) {
printf(
"grant verify failed: granter %d has no grant table\n",
granter);
return(EPERM);
}
if(priv(granter_proc)->s_grant_entries <= grant) {
printf(
@ -244,7 +249,11 @@ int access; /* CPF_READ for a copy from granter to grantee, CPF_WRITE
/* See if there is a reasonable grant table. */
if(!(granter_p = endpoint_lookup(granter))) return EINVAL;
if(!HASGRANTTABLE(granter_p)) return EPERM;
if(!HASGRANTTABLE(granter_p)) {
printf(
"safecopy failed: granter %d has no grant table\n", granter);
return(EPERM);
}
/* Decide who is src and who is dst. */
if(access & CPF_READ) {

View file

@ -8,6 +8,7 @@
PUBLIC char sef_self_name[SEF_SELF_NAME_MAXLEN];
PUBLIC endpoint_t sef_self_endpoint;
PUBLIC int sef_self_priv_flags;
PUBLIC int sef_self_first_receive_done;
/* Debug. */
#define SEF_DEBUG_HEADER_MAXLEN 32
@ -41,14 +42,16 @@ PUBLIC void sef_startup()
/* SEF startup interface for system services. */
int r, status;
endpoint_t old_endpoint;
int priv_flags;
/* Get information about self. */
r = sys_whoami(&sef_self_endpoint, sef_self_name, SEF_SELF_NAME_MAXLEN,
&sef_self_priv_flags);
&priv_flags);
if ( r != OK) {
sef_self_endpoint = SELF;
sprintf(sef_self_name, "%s", "Unknown");
}
sef_self_priv_flags = priv_flags;
old_endpoint = NONE;
/* RS may wake up with the wrong endpoint, perfom the update in that case. */
@ -92,6 +95,10 @@ PUBLIC void sef_startup()
}
}
#endif
/* (Re)initialize SEF variables. */
sef_self_first_receive_done = FALSE;
sef_self_priv_flags = priv_flags;
}
/*===========================================================================*
@ -112,6 +119,7 @@ PUBLIC int sef_receive_status(endpoint_t src, message *m_ptr, int *status_ptr)
/* Receive and return in case of error. */
r = receive(src, m_ptr, &status);
if(status_ptr) *status_ptr = status;
if(!sef_self_first_receive_done) sef_self_first_receive_done = TRUE;
if(r != OK) {
return r;
}

View file

@ -8,10 +8,12 @@ PRIVATE struct sef_cbs {
sef_cb_init_t sef_cb_init_fresh;
sef_cb_init_t sef_cb_init_lu;
sef_cb_init_t sef_cb_init_restart;
sef_cb_init_response_t sef_cb_init_response;
} sef_cbs = {
SEF_CB_INIT_FRESH_DEFAULT,
SEF_CB_INIT_LU_DEFAULT,
SEF_CB_INIT_RESTART_DEFAULT
SEF_CB_INIT_RESTART_DEFAULT,
SEF_CB_INIT_RESPONSE_DEFAULT
};
/* SEF Init prototypes for sef_startup(). */
@ -31,7 +33,8 @@ EXTERN endpoint_t sef_self_priv_flags;
PRIVATE int process_init(int type, sef_init_info_t *info)
{
/* Process initialization. */
int r;
int r, result;
message m;
/* Debug. */
#if SEF_INIT_DEBUG
@ -44,21 +47,26 @@ PRIVATE int process_init(int type, sef_init_info_t *info)
/* Let the callback code handle the specific initialization type. */
switch(type) {
case SEF_INIT_FRESH:
r = sef_cbs.sef_cb_init_fresh(type, info);
result = sef_cbs.sef_cb_init_fresh(type, info);
break;
case SEF_INIT_LU:
r = sef_cbs.sef_cb_init_lu(type, info);
result = sef_cbs.sef_cb_init_lu(type, info);
break;
case SEF_INIT_RESTART:
r = sef_cbs.sef_cb_init_restart(type, info);
result = sef_cbs.sef_cb_init_restart(type, info);
break;
default:
/* Not a valid SEF init type. */
r = EINVAL;
result = EINVAL;
break;
}
m.m_source = sef_self_endpoint;
m.m_type = RS_INIT;
m.RS_INIT_RESULT = result;
r = sef_cbs.sef_cb_init_response(&m);
return r;
}
@ -109,10 +117,6 @@ PUBLIC int do_sef_init_request(message *m_ptr)
/* Peform initialization. */
r = process_init(type, &info);
/* Report back to RS. */
m_ptr->RS_INIT_RESULT = r;
r = sendrec(RS_PROC_NR, m_ptr);
return r;
}
@ -143,6 +147,15 @@ PUBLIC void sef_setcb_init_restart(sef_cb_init_t cb)
sef_cbs.sef_cb_init_restart = cb;
}
/*===========================================================================*
* sef_setcb_init_response *
*===========================================================================*/
PUBLIC void sef_setcb_init_response(sef_cb_init_response_t cb)
{
assert(cb != NULL);
sef_cbs.sef_cb_init_response = cb;
}
/*===========================================================================*
* sef_cb_init_null *
*===========================================================================*/
@ -152,6 +165,14 @@ PUBLIC int sef_cb_init_null(int UNUSED(type),
return OK;
}
/*===========================================================================*
* sef_cb_init_response_null *
*===========================================================================*/
PUBLIC int sef_cb_init_response_null(message * UNUSED(m_ptr))
{
return ENOSYS;
}
/*===========================================================================*
* sef_cb_init_fail *
*===========================================================================*/
@ -170,3 +191,16 @@ PUBLIC int sef_cb_init_crash(int UNUSED(type), sef_init_info_t *UNUSED(info))
return OK;
}
/*===========================================================================*
* sef_cb_init_response_rs_reply *
*===========================================================================*/
PUBLIC int sef_cb_init_response_rs_reply(message *m_ptr)
{
int r;
/* Inform RS that we completed initialization with the given result. */
r = sendrec(RS_PROC_NR, m_ptr);
return r;
}

View file

@ -3,7 +3,7 @@
#include <minix/sysutil.h>
/* SEF Live update variables. */
PRIVATE int sef_lu_state = SEF_LU_STATE_NULL;
PRIVATE int sef_lu_state;
/* SEF Live update callbacks. */
PRIVATE struct sef_cbs {
@ -12,12 +12,14 @@ PRIVATE struct sef_cbs {
sef_cb_lu_state_changed_t sef_cb_lu_state_changed;
sef_cb_lu_state_dump_t sef_cb_lu_state_dump;
sef_cb_lu_state_save_t sef_cb_lu_state_save;
sef_cb_lu_response_t sef_cb_lu_response;
} sef_cbs = {
SEF_CB_LU_PREPARE_DEFAULT,
SEF_CB_LU_STATE_ISVALID_DEFAULT,
SEF_CB_LU_STATE_CHANGED_DEFAULT,
SEF_CB_LU_STATE_DUMP_DEFAULT,
SEF_CB_LU_STATE_SAVE_DEFAULT,
SEF_CB_LU_RESPONSE_DEFAULT
};
/* SEF Live update prototypes for sef_receive(). */
@ -31,6 +33,10 @@ PRIVATE _PROTOTYPE( void sef_lu_ready, (int result) );
EXTERN _PROTOTYPE( char* sef_debug_header, (void) );
PRIVATE int sef_lu_debug_cycle = 0;
/* Information about SELF. */
EXTERN endpoint_t sef_self_endpoint;
EXTERN int sef_self_first_receive_done;
/*===========================================================================*
* do_sef_lu_before_receive *
*===========================================================================*/
@ -39,6 +45,11 @@ PUBLIC void do_sef_lu_before_receive()
/* Handle SEF Live update before receive events. */
int r;
/* Initialize on first receive. */
if(!sef_self_first_receive_done) {
sef_lu_state = SEF_LU_STATE_NULL;
}
/* Nothing to do if we are not preparing for a live update. */
if(sef_lu_state == SEF_LU_STATE_NULL) {
return;
@ -113,7 +124,7 @@ PUBLIC int do_sef_lu_request(message *m_ptr)
PRIVATE void sef_lu_ready(int result)
{
message m;
int old_state, rs_result, r;
int old_state, r;
#if SEF_LU_DEBUG
sef_lu_debug_begin();
@ -134,22 +145,22 @@ PRIVATE void sef_lu_ready(int result)
}
}
/* Inform RS that we're ready with the given result. */
/* Let the callback code produce a live update response and block.
* We should get beyond this point only if either result is an error or
* something else goes wrong in the callback code.
*/
m.m_source = sef_self_endpoint;
m.m_type = RS_LU_PREPARE;
m.RS_LU_STATE = sef_lu_state;
m.RS_LU_RESULT = result;
r = sendrec(RS_PROC_NR, &m);
if ( r != OK) {
panic("sendrec failed: %d", r);
}
r = sef_cbs.sef_cb_lu_response(&m);
#if SEF_LU_DEBUG
rs_result = m.m_type == RS_LU_PREPARE ? EINTR : m.m_type;
sef_lu_debug_begin();
sef_lu_dprint("%s, cycle=%d. The %s aborted the update with result %d!\n",
sef_debug_header(), sef_lu_debug_cycle,
(result == OK ? "server" : "client"),
(result == OK ? rs_result : result)); /* EINTR if update was canceled. */
(result == OK ? r : result)); /* EINTR if update was canceled. */
sef_lu_debug_end();
#endif
@ -208,6 +219,15 @@ PUBLIC void sef_setcb_lu_state_save(sef_cb_lu_state_save_t cb)
sef_cbs.sef_cb_lu_state_save = cb;
}
/*===========================================================================*
* sef_setcb_lu_response *
*===========================================================================*/
PUBLIC void sef_setcb_lu_response(sef_cb_lu_response_t cb)
{
assert(cb != NULL);
sef_cbs.sef_cb_lu_response = cb;
}
/*===========================================================================*
* sef_cb_lu_prepare_null *
*===========================================================================*/
@ -248,6 +268,14 @@ PUBLIC int sef_cb_lu_state_save_null(int UNUSED(result))
return OK;
}
/*===========================================================================*
* sef_cb_lu_response_null *
*===========================================================================*/
PUBLIC int sef_cb_lu_response_null(message * UNUSED(m_ptr))
{
return ENOSYS;
}
/*===========================================================================*
* sef_cb_lu_prepare_always_ready *
*===========================================================================*/
@ -297,3 +325,19 @@ PUBLIC int sef_cb_lu_state_isvalid_workfree(int state)
return (state == SEF_LU_STATE_WORK_FREE);
}
/*===========================================================================*
* sef_cb_lu_response_rs_reply *
*===========================================================================*/
PUBLIC int sef_cb_lu_response_rs_reply(message *m_ptr)
{
int r;
/* Inform RS that we're ready with the given result. */
r = sendrec(RS_PROC_NR, m_ptr);
if ( r != OK) {
return r;
}
return m_ptr->m_type == RS_LU_PREPARE ? EINTR : m_ptr->m_type;
}

View file

@ -97,5 +97,9 @@
#define RS_DONTREPLY 0
#define RS_REPLY 1
/* Swap flags. */
#define RS_DONTSWAP 0
#define RS_SWAP 1
#endif /* RS_CONST_H */

View file

@ -126,7 +126,7 @@ PUBLIC int main(void)
/* Finally send reply message, unless disabled. */
if (result != EDONTREPLY) {
m.m_type = result;
reply(who_e, &m);
reply(who_e, NULL, &m);
}
}
}
@ -138,9 +138,13 @@ PUBLIC int main(void)
PRIVATE void sef_local_startup()
{
/* Register init callbacks. */
sef_setcb_init_response(do_init_ready);
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_restart(sef_cb_init_fail);
/* Register live update callbacks. */
sef_setcb_lu_response(do_upd_ready);
/* Register signal callbacks. */
sef_setcb_signal_handler(sef_cb_signal_handler);
sef_setcb_signal_manager(sef_cb_signal_manager);
@ -349,11 +353,6 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
continue;
}
/* Ignore RS. */
if(boot_image_priv->endpoint == RS_PROC_NR) {
continue;
}
/* Kernel-scheduled processes first */
if ((boot_image_priv->sched == KERNEL) ? usersched : !usersched) {
continue;
@ -363,14 +362,21 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
rp = &rproc[boot_image_priv - boot_image_priv_table];
rpub = rp->r_pub;
/* Allow the service to run. */
if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
panic("unable to initialize privileges: %d", s);
/* RS is already running as we speak. */
if(boot_image_priv->endpoint == RS_PROC_NR) {
if ((s = init_service(rp, SEF_INIT_FRESH)) != OK) {
panic("unable to initialize RS: %d", s);
}
continue;
}
/* Allow the service to run. */
if ((s = sched_init_proc(rp)) != OK) {
panic("unable to initialize scheduling: %d", s);
}
if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
panic("unable to initialize privileges: %d", s);
}
/* Initialize service. We assume every service will always get
* back to us here at boot time.
@ -456,14 +462,8 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
if(pid == 0) {
/* New RS instance running. */
/* Synchronize with the old instance. */
s = sef_receive(RS_PROC_NR, &m);
if(s != OK) {
panic("sef_receive failed: %d", s);
}
/* Live update the old instance into the new one. */
s = update_service(&rp, &replica_rp);
s = update_service(&rp, &replica_rp, RS_SWAP);
if(s != OK) {
panic("unable to live update RS: %d", s);
}
@ -485,26 +485,18 @@ PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
/* Old RS instance running. */
/* Set up privileges for the new instance and let it run. */
set_sys_bit(replica_rp->r_priv.s_ipc_to, static_priv_id(RS_PROC_NR));
s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv));
if(s != OK) {
panic("unable to set privileges for the new RS instance: %d", s);
}
s = sys_privctl(replica_endpoint, SYS_PRIV_ALLOW, NULL);
if(s != OK) {
panic("unable to let the new RS instance run: %d", s);
}
if ((s = sched_init_proc(replica_rp)) != OK) {
panic("unable to initialize RS replica scheduling: %d", s);
}
/* Synchronize with the new instance and go to sleep. */
m.m_type = RS_INIT;
s = sendrec(replica_endpoint, &m);
s = sys_privctl(replica_endpoint, SYS_PRIV_YIELD, NULL);
if(s != OK) {
panic("sendrec failed: %d", s);
panic("unable to yield control to the new RS instance: %d", s);
}
/* Not reachable */
NOT_REACHABLE;
}
return(OK);
@ -690,7 +682,7 @@ endpoint_t endpoint;
/* Send a reply to unblock the service. */
m.m_type = OK;
reply(m.m_source, &m);
reply(m.m_source, rp, &m);
/* Mark the slot as no longer initializing. */
rp->r_flags &= ~RS_INITIALIZING;

View file

@ -100,11 +100,6 @@ struct rproc *rp;
return EPERM;
}
/* Disallow RS_UPDATE for RS. */
if(rpub->endpoint == RS_PROC_NR) {
if(call == RS_UPDATE) return EPERM;
}
/* Disallow the call if another call is in progress for the service. */
if(rp->r_flags & RS_LATEREPLY || rp->r_flags & RS_INITIALIZING) {
return EBUSY;
@ -278,7 +273,14 @@ PUBLIC void update_period(message *m_ptr)
/* Prepare cancel request. */
m.m_type = RS_LU_PREPARE;
m.RS_LU_STATE = SEF_LU_STATE_NULL;
asynsend(rpub->endpoint, &m);
if(rpub->endpoint == RS_PROC_NR) {
/* RS can process the request directly. */
do_sef_lu_request(&m);
}
else {
/* Send request message to the system service. */
asynsend(rpub->endpoint, &m);
}
}
}
@ -327,10 +329,8 @@ PUBLIC void end_update(int result, int reply_flag)
surviving_rp->r_flags &= ~RS_UPDATING;
if(reply_flag == RS_REPLY) {
message m;
if(rs_verbose)
printf("RS: %s being replied to\n", srv_to_string(surviving_rp));
m.m_type = result;
reply(surviving_rp->r_pub->endpoint, &m);
reply(surviving_rp->r_pub->endpoint, surviving_rp, &m);
}
/* Cleanup the version that has to die out. */
@ -382,7 +382,7 @@ struct rproc *rp;
/* RS should simply exit() directly. */
if(rpub->endpoint == RS_PROC_NR) {
exit(0);
exit(1);
}
return sys_kill(rpub->endpoint, SIGKILL);
@ -437,7 +437,8 @@ struct rproc *rp;
rpub = rp->r_pub;
use_copy= (rpub->sys_flags & SF_USE_COPY);
has_replica= (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED));
has_replica= (rp->r_old_rp
|| (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED)));
/* Do we need an existing replica to create the service? */
if(!has_replica && (rpub->sys_flags & SF_NEED_REPL)) {
@ -623,20 +624,15 @@ int instance_flag;
rs_flags = (ROOT_SYS_PROC | RST_SYS_PROC);
if((replica_rp->r_priv.s_flags & rs_flags) == rs_flags) {
rs_rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
if(rs_verbose)
printf("RS: %s gets a backup signal manager\n", srv_to_string(rs_rp));
/* Update privilege structures. */
rs_rp->r_priv.s_bak_sig_mgr = replica_rpub->endpoint;
replica_rp->r_priv.s_sig_mgr = SELF;
r = sys_privctl(RS_PROC_NR, SYS_PRIV_UPDATE_SYS, &rs_rp->r_priv);
/* Update signal managers. */
r = update_sig_mgrs(rs_rp, SELF, replica_rpub->endpoint);
if(r == OK) {
r = sys_privctl(replica_rpub->endpoint, SYS_PRIV_UPDATE_SYS,
&replica_rp->r_priv);
r = update_sig_mgrs(replica_rp, SELF, NONE);
}
if(r != OK) {
*rp_link = NULL;
return kill_service(replica_rp, "sys_privctl call failed", r);
return kill_service(replica_rp, "update_sig_mgrs failed", r);
}
}
@ -829,9 +825,10 @@ PUBLIC void stop_service(struct rproc *rp,int how)
/*===========================================================================*
* update_service *
*===========================================================================*/
PUBLIC int update_service(src_rpp, dst_rpp)
PUBLIC int update_service(src_rpp, dst_rpp, swap_flag)
struct rproc **src_rpp;
struct rproc **dst_rpp;
int swap_flag;
{
/* Update an existing service. */
int r;
@ -851,10 +848,11 @@ struct rproc **dst_rpp;
printf("RS: %s updating into %s\n",
srv_to_string(src_rp), srv_to_string(dst_rp));
/* Swap the slots of the two processes. */
r = srv_update(src_rpub->endpoint, dst_rpub->endpoint);
if(r != OK) {
return r;
/* Swap the slots of the two processes when asked to. */
if(swap_flag == RS_SWAP) {
if((r = srv_update(src_rpub->endpoint, dst_rpub->endpoint)) != OK) {
return r;
}
}
/* Swap slots here as well. */
@ -933,7 +931,8 @@ PUBLIC void terminate_service(struct rproc *rp)
new_rp = rp;
old_rp = new_rp->r_old_rp;
new_rp->r_flags &= ~RS_INITIALIZING;
update_service(&new_rp, &old_rp); /* can't fail */
r = update_service(&new_rp, &old_rp, RS_SWAP);
assert(r == OK); /* can't fail */
end_update(ERESTART, RS_REPLY);
return;
}
@ -1072,7 +1071,7 @@ PUBLIC void restart_service(struct rproc *rp)
replica_rp = rp->r_next_rp;
/* Update the service into the replica. */
r = update_service(&rp, &replica_rp);
r = update_service(&rp, &replica_rp, RS_SWAP);
if(r != OK) {
kill_service(rp, "unable to update into new replica", r);
return;
@ -1085,9 +1084,6 @@ PUBLIC void restart_service(struct rproc *rp)
return;
}
/* Increase the number of restarts. */
replica_rp->r_restarts += 1;
if(rs_verbose)
printf("RS: %s restarted into %s\n",
srv_to_string(rp), srv_to_string(replica_rp));
@ -1129,7 +1125,7 @@ struct rproc *rp;
struct rproc ***rps;
int *length;
{
/* Retrieve all the service instances of a give service. */
/* Retrieve all the service instances of a given service. */
static struct rproc *instances[5];
int nr_instances;
@ -1573,6 +1569,10 @@ struct rproc **clone_rpp;
if(clone_rpub->sys_flags & SF_USE_COPY) {
share_exec(clone_rp, rp); /* share exec image */
}
clone_rp->r_old_rp = NULL; /* no old version yet */
clone_rp->r_new_rp = NULL; /* no new version yet */
clone_rp->r_prev_rp = NULL; /* no prev replica yet */
clone_rp->r_next_rp = NULL; /* no next replica yet */
/* Force dynamic privilege id. */
clone_rp->r_priv.s_flags |= DYN_PRIV_ID;

View file

@ -58,7 +58,7 @@ _PROTOTYPE( int run_service, (struct rproc *rp, int init_type) );
_PROTOTYPE( int start_service, (struct rproc *rp) );
_PROTOTYPE( void stop_service, (struct rproc *rp,int how) );
_PROTOTYPE( int update_service, (struct rproc **src_rpp,
struct rproc **dst_rpp) );
struct rproc **dst_rpp, int swap_flag) );
_PROTOTYPE( void activate_service, (struct rproc *rp, struct rproc *ex_rp) );
_PROTOTYPE( void terminate_service, (struct rproc *rp));
_PROTOTYPE( void restart_service, (struct rproc *rp) );
@ -92,10 +92,12 @@ _PROTOTYPE( int init_service, (struct rproc *rp, int type));
_PROTOTYPE(void fill_call_mask, ( int *calls, int tot_nr_calls,
bitchunk_t *call_mask, int call_base, int is_init));
_PROTOTYPE( char* srv_to_string, (struct rproc *rp));
_PROTOTYPE( void reply, (endpoint_t who, message *m_ptr));
_PROTOTYPE( void reply, (endpoint_t who, struct rproc *rp, message *m_ptr));
_PROTOTYPE( void late_reply, (struct rproc *rp, int code));
_PROTOTYPE( int rs_isokendpt, (endpoint_t endpoint, int *proc));
_PROTOTYPE( int sched_init_proc, (struct rproc *rp));
_PROTOTYPE( int update_sig_mgrs, (struct rproc *rp, endpoint_t sig_mgr,
endpoint_t bak_sig_mgr));
/* error.c */
_PROTOTYPE( char * init_strerror, (int errnum) );

View file

@ -363,22 +363,30 @@ PUBLIC int do_shutdown(message *m_ptr)
PUBLIC int do_init_ready(message *m_ptr)
{
int who_p;
message m;
struct rproc *rp;
struct rprocpub *rpub;
int result;
int result, is_rs;
int r;
is_rs = (m_ptr->m_source == RS_PROC_NR);
who_p = _ENDPOINT_P(m_ptr->m_source);
result = m_ptr->RS_INIT_RESULT;
/* Check for RS failing initialization first. */
if(is_rs && result != OK) {
return result;
}
rp = rproc_ptr[who_p];
rpub = rp->r_pub;
result = m_ptr->RS_INIT_RESULT;
/* Make sure the originating service was requested to initialize. */
if(! (rp->r_flags & RS_INITIALIZING) ) {
if(rs_verbose)
printf("RS: do_init_ready: got unexpected init ready msg from %d\n",
m_ptr->m_source);
return(EDONTREPLY);
return EINVAL;
}
/* Check if something went wrong and the service failed to init.
@ -389,7 +397,7 @@ PUBLIC int do_init_ready(message *m_ptr)
printf("RS: %s initialization error: %s\n", srv_to_string(rp),
init_strerror(result));
crash_service(rp); /* simulate crash */
return(EDONTREPLY);
return EDONTREPLY;
}
/* Mark the slot as no longer initializing. */
@ -397,6 +405,10 @@ PUBLIC int do_init_ready(message *m_ptr)
rp->r_check_tm = 0;
getuptime(&rp->r_alive_tm);
/* Reply and unblock the service before doing anything else. */
m.m_type = OK;
reply(rpub->endpoint, rp, &m);
/* See if a late reply has to be sent. */
late_reply(rp, OK);
@ -417,6 +429,7 @@ PUBLIC int do_init_ready(message *m_ptr)
if(rp->r_prev_rp) {
cleanup_service(rp->r_prev_rp);
rp->r_prev_rp = NULL;
rp->r_restarts += 1;
if(rs_verbose)
printf("RS: %s completed restart\n", srv_to_string(rp));
@ -429,7 +442,7 @@ PUBLIC int do_init_ready(message *m_ptr)
}
}
return(OK);
return is_rs ? OK : EDONTREPLY; /* return what the caller expects */
}
/*===========================================================================*
@ -563,19 +576,42 @@ PUBLIC int do_update(message *m_ptr)
if(rs_verbose)
printf("RS: %s updating\n", srv_to_string(rp));
/* Request to update. */
m_ptr->m_type = RS_LU_PREPARE;
asynsend3(rpub->endpoint, m_ptr, AMF_NOREPLY);
/* If RS is updating, set up signal managers for the new instance.
* The current RS instance must be made the backup signal manager to
* support rollback in case of a crash during initialization.
*/
if(rp->r_priv.s_flags & ROOT_SYS_PROC) {
new_rp = rp->r_new_rp;
/* Unblock the caller immediately if requested. */
if(noblock) {
return OK;
s = update_sig_mgrs(new_rp, SELF, new_rp->r_pub->endpoint);
if(s != OK) {
cleanup_service(new_rp);
return s;
}
}
/* Late reply - send a reply when the new version completes initialization. */
rp->r_flags |= RS_LATEREPLY;
rp->r_caller = m_ptr->m_source;
rp->r_caller_request = RS_UPDATE;
if(noblock) {
/* Unblock the caller immediately if requested. */
m_ptr->m_type = OK;
reply(m_ptr->m_source, NULL, m_ptr);
}
else {
/* Send a reply when the new version completes initialization. */
rp->r_flags |= RS_LATEREPLY;
rp->r_caller = m_ptr->m_source;
rp->r_caller_request = RS_UPDATE;
}
/* Request to update. */
m_ptr->m_type = RS_LU_PREPARE;
if(rpub->endpoint == RS_PROC_NR) {
/* RS can process the request directly. */
do_sef_lu_request(m_ptr);
}
else {
/* Send request message to the system service. */
asynsend3(rpub->endpoint, m_ptr, AMF_NOREPLY);
}
return EDONTREPLY;
}
@ -588,18 +624,20 @@ PUBLIC int do_upd_ready(message *m_ptr)
struct rproc *rp, *old_rp, *new_rp;
int who_p;
int result;
int is_rs;
int r;
who_p = _ENDPOINT_P(m_ptr->m_source);
rp = rproc_ptr[who_p];
result = m_ptr->RS_LU_RESULT;
is_rs = (m_ptr->m_source == RS_PROC_NR);
/* Make sure the originating service was requested to prepare for update. */
if(rp != rupdate.rp) {
if(rs_verbose)
printf("RS: do_upd_ready: got unexpected update ready msg from %d\n",
m_ptr->m_source);
return(EINVAL);
return EINVAL;
}
/* Check if something went wrong and the service failed to prepare
@ -610,13 +648,31 @@ PUBLIC int do_upd_ready(message *m_ptr)
end_update(result, RS_REPLY);
printf("RS: update failed: %s\n", lu_strerror(result));
return EDONTREPLY;
return is_rs ? result : EDONTREPLY; /* return what the caller expects */
}
old_rp = rp;
new_rp = rp->r_new_rp;
/* If RS itself is updating, yield control to the new version immediately. */
if(is_rs) {
r = init_service(new_rp, SEF_INIT_LU);
if(r != OK) {
panic("unable to initialize the new RS instance: %d", r);
}
r = sys_privctl(new_rp->r_pub->endpoint, SYS_PRIV_YIELD, NULL);
if(r != OK) {
panic("unable to yield control to the new RS instance: %d", r);
}
/* If we get this far, the new version failed to initialize. Rollback. */
r = srv_update(RS_PROC_NR, new_rp->r_pub->endpoint);
assert(r == OK); /* can't fail */
end_update(ERESTART, RS_REPLY);
return ERESTART;
}
/* Perform the update. */
old_rp = rp;
new_rp = rp->r_new_rp;
r = update_service(&old_rp, &new_rp);
r = update_service(&old_rp, &new_rp, RS_SWAP);
if(r != OK) {
end_update(r, RS_REPLY);
printf("RS: update failed: error %d\n", r);
@ -626,7 +682,9 @@ PUBLIC int do_upd_ready(message *m_ptr)
/* Let the new version run. */
r = run_service(new_rp, SEF_INIT_LU);
if(r != OK) {
update_service(&new_rp, &old_rp); /* rollback, can't fail. */
/* Something went wrong. Rollback. */
r = update_service(&new_rp, &old_rp, RS_SWAP);
assert(r == OK); /* can't fail */
end_update(r, RS_REPLY);
printf("RS: update failed: error %d\n", r);
return EDONTREPLY;
@ -752,13 +810,13 @@ PUBLIC void do_sigchld()
* free slots for all the service instances and send a late
* reply if necessary.
*/
get_service_instances(rp, &rps, &nr_rps);
for(i=0;i<nr_rps;i++) {
if(rupdate.flags & RS_UPDATING) {
rupdate.flags &= ~RS_UPDATING;
}
free_slot(rps[i]);
}
get_service_instances(rp, &rps, &nr_rps);
for(i=0;i<nr_rps;i++) {
if(rupdate.flags & RS_UPDATING) {
rupdate.flags &= ~RS_UPDATING;
}
free_slot(rps[i]);
}
}
}
}

View file

@ -27,6 +27,11 @@ int type; /* type of initialization */
rp->r_flags |= RS_INITIALIZING; /* now initializing */
rp->r_check_tm = rp->r_alive_tm + 1; /* expect reply within period */
/* In case of RS initialization, we are done. */
if(rp->r_priv.s_flags & ROOT_SYS_PROC) {
return OK;
}
/* Determine the old endpoint if this is a new instance. */
old_endpoint = NONE;
if(rp->r_old_rp) {
@ -129,12 +134,21 @@ struct rproc *rp; /* pointer to process slot */
/*===========================================================================*
* reply *
*===========================================================================*/
PUBLIC void reply(who, m_ptr)
PUBLIC void reply(who, rp, m_ptr)
endpoint_t who; /* replyee */
struct rproc *rp; /* replyee slot (if any) */
message *m_ptr; /* reply message */
{
int r; /* send status */
/* No need to actually reply to RS */
if(who == RS_PROC_NR) {
return;
}
if(rs_verbose && rp)
printf("RS: %s being replied to\n", srv_to_string(rp));
r = sendnb(who, m_ptr); /* send the message */
if (r != OK)
printf("RS: unable to send reply to %d: %d\n", who, r);
@ -159,7 +173,7 @@ int code; /* status code */
printf("RS: %s late reply %d to %d for request %d\n",
srv_to_string(rp), code, rp->r_caller, rp->r_caller_request);
reply(rp->r_caller, &m);
reply(rp->r_caller, NULL, &m);
rp->r_flags &= ~RS_LATEREPLY;
}
}
@ -212,3 +226,41 @@ PUBLIC int sched_init_proc(struct rproc *rp)
return OK;
}
/*===========================================================================*
* update_sig_mgrs *
*===========================================================================*/
PUBLIC int update_sig_mgrs(struct rproc *rp, endpoint_t sig_mgr,
endpoint_t bak_sig_mgr)
{
int r;
struct rprocpub *rpub;
rpub = rp->r_pub;
if(rs_verbose)
printf("RS: %s updates signal managers: %d%s / %d\n", srv_to_string(rp),
sig_mgr == SELF ? rpub->endpoint : sig_mgr,
sig_mgr == SELF ? "(SELF)" : "",
bak_sig_mgr == NONE ? -1 : bak_sig_mgr);
/* Synch privilege structure with the kernel. */
if ((r = sys_getpriv(&rp->r_priv, rpub->endpoint)) != OK) {
printf("unable to synch privilege structure: %d", r);
return r;
}
/* Set signal managers. */
rp->r_priv.s_sig_mgr = sig_mgr;
rp->r_priv.s_bak_sig_mgr = bak_sig_mgr;
/* Update privilege structure. */
r = sys_privctl(rpub->endpoint, SYS_PRIV_UPDATE_SYS, &rp->r_priv);
if(r != OK) {
printf("unable to update privilege structure: %d", r);
return r;
}
return OK;
}