Improve asynsend support for process swapping
This resolves various system stalls while running testrelpol. Change-Id: Ie70fc2dbcdb0a8c9e3800cc0df564be747e111ec
This commit is contained in:
parent
5105ab554b
commit
7f79fb8810
3 changed files with 34 additions and 0 deletions
|
@ -120,6 +120,8 @@ int do_privctl(struct proc * caller, message * m_ptr)
|
|||
priv(rp)->s_proc_nr = proc_nr; /* reassociate process nr */
|
||||
|
||||
for (i=0; i< NR_SYS_CHUNKS; i++) /* remove pending: */
|
||||
priv(rp)->s_asyn_pending.chunk[i] = 0; /* - incoming asyn */
|
||||
for (i=0; i< NR_SYS_CHUNKS; i++) /* messages */
|
||||
priv(rp)->s_notify_pending.chunk[i] = 0; /* - notifications */
|
||||
priv(rp)->s_int_pending = 0; /* - interrupts */
|
||||
(void) sigemptyset(&priv(rp)->s_sig_pending); /* - signals */
|
||||
|
|
|
@ -286,6 +286,7 @@ static void adjust_priv_slot(struct priv *privp, struct priv *from_privp)
|
|||
{
|
||||
/* Preserve privilege ids and non-privilege stuff in the priv structure. */
|
||||
privp->s_id = from_privp->s_id;
|
||||
privp->s_asyn_pending = from_privp->s_asyn_pending;
|
||||
privp->s_notify_pending = from_privp->s_notify_pending;
|
||||
privp->s_int_pending = from_privp->s_int_pending;
|
||||
privp->s_sig_pending = from_privp->s_sig_pending;
|
||||
|
|
|
@ -196,6 +196,37 @@ static void sef_lu_ready(int result)
|
|||
* Restore things back to normal and continue executing.
|
||||
*/
|
||||
sef_lu_state_change(SEF_LU_STATE_NULL, 0);
|
||||
|
||||
/* Transfer of asynsend tables during live update is messy at best. The
|
||||
* general idea is that the asynsend table is preserved during live update,
|
||||
* so that messages never get lost. That means that 1) the new instance
|
||||
* takes over the table from the old instance upon live update, and 2) the
|
||||
* old instance takes over the table on rollback. Case 1 is not atomic:
|
||||
* the new instance starts with no asynsend table, and after swapping slots,
|
||||
* the old instance's table will no longer be looked at by the kernel. The
|
||||
* new instance copies over the table from the old instance, and then calls
|
||||
* senda_reload() to tell the kernel about the new location of the otherwise
|
||||
* preserved table. Case 2 is different: the old instance cannot copy the
|
||||
* table from the new instance, and so the kernel does that part, based on
|
||||
* the table provided through the new instance's senda_reload(). However, if
|
||||
* the new instance never got to the senda_reload() call, then the kernel
|
||||
* also would not have been able to deliver any messages, and so the old
|
||||
* instance's table can still be used as is. Now the problem. Because case 1
|
||||
* is not atomic, there is a small window during which other processes may
|
||||
* attempt to receive a message, based on the fact that their s_asyn_pending
|
||||
* mask in the kernel has a bit set for the process being updated. Failing
|
||||
* to find a matching message in the yet-missing table of the new process,
|
||||
* the kernel will unset the s_asyn_pending bit. Now, normally the bit would
|
||||
* be set again through the new instance's senda_reload() call. However, if
|
||||
* the new instance rolls back instead, the old instance will have a message
|
||||
* for the other process, but its s_asyn_pending bit will not be set. Thus,
|
||||
* the message will never be delivered unless we call senda_reload() here.
|
||||
* XXX TODO: the story is even more complicated, because based on the above
|
||||
* story, copying back the table should never be necessary and never happen.
|
||||
* My logs show it does happen for at least RS, which may indicate RS sends
|
||||
* asynchronous messages in its initialization code.. -dcvmoole
|
||||
*/
|
||||
senda_reload();
|
||||
}
|
||||
|
||||
/*===========================================================================*
|
||||
|
|
Loading…
Reference in a new issue