Cristiano Giuffrida 1f5841c8ed Basic System Event Framework (SEF) with ping and live update.
- SEF must be used by every system process and is thereby part of the system
- The framework provides a receive() interface (sef_receive) for system
processes to automatically catch known system even messages and process them.
- SEF provides a default behavior for each type of system event, but allows
system processes to register callbacks to override the default behavior.
- Custom (local to the process) or predefined (provided by SEF) callback
implementations can be registered to SEF.
- SEF currently includes support for 2 types of system events:
  1. SEF Ping. The event occurs every time RS sends a ping to figure out
  whether a system process is still alive. The default callback implementation
  provided by SEF is to notify RS back to let it know the process is alive
  and kicking.
  2. SEF Live update. The event occurs every time RS sends a prepare to update
  message to let a system process know an update is available and to prepare
  for it. The live update support is very basic for now. SEF only deals with
  verifying if the prepare state can be supported by the process, dumping the
  state for debugging purposes, and providing an event-driven programming
  model to the process to react to state changes check-in when ready to update.
- SEF should be extended in the future to integrate support for more types of
system events. Ideally, all the cross-cutting concerns should be integrated into
SEF to avoid duplicating code and ease extensibility. Examples include:
  * PM notify messages primarily used at shutdown.
  * SYSTEM notify messages primarily used for signals.
  * CLOCK notify messages used for system alarms.
  * Debug messages. IS could still be in charge of fkey handling but would
  forward the debug message to the target process (e.g. PM, if the user
  requested debug information about PM). SEF would then catch the message and
  do nothing unless the process has registered an appropriate callback to
  deal with the event. This simplifies the programming model to print debug
  information, avoids duplicating code, and reduces the effort to print
  debug information.

- Every system process registers SEF callbacks it needs to override the default
system behavior and calls sef_startup() right after being started.
- sef_startup() does almost nothing now, but will be extended in the future to
support callbacks of its own to let RS control and synchronize with every
system process at initialization time.
- Every system process calls sef_receive() now rather than receive() directly,
to let SEF handle predefined system events.

- RS supports a basic single-component live update protocol now, as follows:
  * When an update command is issued (via "service update *"), RS notifies the
  target system process to prepare for a specific update state.
  * If the process doesn't respond back in time, the update is aborted.
  * When the process responds back, RS kills it and marks it for refreshing.
  * The process is then automatically restarted as for a buggy process and can
  start running again.
  * Live update is currently prototyped as a controlled failure.
2009-12-21 14:12:21 +00:00

245 lines
8.2 KiB

#include "syslib.h"
#include <assert.h>
#include <minix/sysutil.h>
/* SEF Live update variables. */
PRIVATE int sef_lu_state = SEF_LU_STATE_NULL;
/* SEF Live update callbacks. */
PRIVATE struct sef_cbs {
sef_cb_lu_prepare_t sef_cb_lu_prepare;
sef_cb_lu_state_isvalid_t sef_cb_lu_state_isvalid;
sef_cb_lu_state_changed_t sef_cb_lu_state_changed;
sef_cb_lu_state_dump_t sef_cb_lu_state_dump;
sef_cb_lu_ready_pre_t sef_cb_lu_ready_pre;
} sef_cbs = {
/* SEF Live update prototypes for sef_receive(). */
PUBLIC _PROTOTYPE( void do_sef_lu_before_receive, (void) );
PUBLIC _PROTOTYPE( int do_sef_lu_request, (message *m_ptr) );
/* Debug. */
EXTERN _PROTOTYPE( char* sef_debug_header, (void) );
PRIVATE int sef_lu_debug_cycle = 0;
* do_sef_lu_before_receive *
PUBLIC void do_sef_lu_before_receive()
/* Handle SEF Live update before receive events. */
/* Nothing to do if we are not preparing for a live update. */
if(sef_lu_state == SEF_LU_STATE_NULL) {
/* Debug. */
sef_lu_dprint("%s, cycle=%d. Dumping state variables:\n",
sef_debug_header(), sef_lu_debug_cycle);
/* Let the callback code handle the event.
* For SEF_LU_STATE_WORK_FREE, we're always ready, tell immediately.
if(sef_lu_state == SEF_LU_STATE_WORK_FREE) {
else {
* do_sef_lu_request *
PUBLIC int do_sef_lu_request(message *m_ptr)
/* Handle a SEF Live update request. */
int old_state, is_valid_state;
sef_lu_debug_cycle = 0;
old_state = sef_lu_state;
/* Only accept live update requests with a valid state. */
is_valid_state = sef_cbs.sef_cb_lu_state_isvalid(m_ptr->RS_LU_STATE);
if(!is_valid_state) {
else {
/* Set the new live update state. */
sef_lu_state = m_ptr->RS_LU_STATE;
/* If the live update state changed, let the callback code
* handle the rest.
if(old_state != sef_lu_state) {
sef_cbs.sef_cb_lu_state_changed(old_state, sef_lu_state);
/* Return OK not to let anybody else intercept the request. */
* sef_lu_ready *
PUBLIC void sef_lu_ready(int result)
message m;
int old_state, r;
sef_lu_dprint("%s, cycle=%d. Ready to update with result: %d%s\n",
sef_debug_header(), sef_lu_debug_cycle,
result, (result == OK ? "(OK)" : ""));
/* Let the callback code perform any pre-ready operations. */
r = sef_cbs.sef_cb_lu_ready_pre(result);
if(r != OK) {
/* Abort update if callback returned error. */
result = r;
else {
/* Inform RS that we're ready with the given result. */
m.m_type = RS_LU_PREPARE;
m.RS_LU_STATE = sef_lu_state;
m.RS_LU_RESULT = result;
r = sendrec(RS_PROC_NR, &m);
if ( r != OK) {
panic("SEF", "sendrec failed", r);
sef_lu_dprint("%s, cycle=%d. The %s aborted the update!\n",
sef_debug_header(), sef_lu_debug_cycle,
(result == OK ? "server" : "client"));
/* Something went wrong. Update was aborted and we didn't get updated.
* Restore things back to normal and continue executing.
old_state = sef_lu_state;
sef_lu_state = SEF_LU_STATE_NULL;
if(old_state != sef_lu_state) {
sef_cbs.sef_cb_lu_state_changed(old_state, sef_lu_state);
* sef_setcb_lu_prepare *
PUBLIC void sef_setcb_lu_prepare(sef_cb_lu_prepare_t cb)
assert(cb != NULL);
sef_cbs.sef_cb_lu_prepare = cb;
* sef_setcb_lu_state_isvalid *
PUBLIC void sef_setcb_lu_state_isvalid(sef_cb_lu_state_isvalid_t cb)
assert(cb != NULL);
sef_cbs.sef_cb_lu_state_isvalid = cb;
* sef_setcb_lu_state_changed *
PUBLIC void sef_setcb_lu_state_changed(sef_cb_lu_state_changed_t cb)
assert(cb != NULL);
sef_cbs.sef_cb_lu_state_changed = cb;
* sef_setcb_lu_state_dump *
PUBLIC void sef_setcb_lu_state_dump(sef_cb_lu_state_dump_t cb)
assert(cb != NULL);
sef_cbs.sef_cb_lu_state_dump = cb;
* sef_setcb_lu_ready_pre *
PUBLIC void sef_setcb_lu_ready_pre(sef_cb_lu_ready_pre_t cb)
assert(cb != NULL);
sef_cbs.sef_cb_lu_ready_pre = cb;
* sef_cb_lu_prepare_null *
PUBLIC void sef_cb_lu_prepare_null(int state)
* sef_cb_lu_state_isvalid_null *
PUBLIC int sef_cb_lu_state_isvalid_null(int state)
return FALSE;
* sef_cb_lu_state_changed_null *
PUBLIC void sef_cb_lu_state_changed_null(int old_state, int state)
* sef_cb_lu_state_dump_null *
PUBLIC void sef_cb_lu_state_dump_null(int state)
* sef_cb_lu_ready_pre_null *
PUBLIC int sef_cb_lu_ready_pre_null(int result)
* sef_cb_lu_prepare_always_ready *
PUBLIC void sef_cb_lu_prepare_always_ready(int state)
* sef_cb_lu_state_isvalid_standard *
PUBLIC int sef_cb_lu_state_isvalid_standard(int state)