Don't always assume NOPINGREPLY as a failure in RS

This commit is contained in:
Cristiano Giuffrida 2010-07-20 01:50:33 +00:00
parent 16d0609fad
commit af424b4e43
3 changed files with 41 additions and 3 deletions

View file

@ -1730,6 +1730,32 @@ PUBLIC struct rproc* lookup_slot_by_dev_nr(dev_t dev_nr)
return NULL;
}
/*===========================================================================*
* lookup_slot_by_flags *
*===========================================================================*/
PUBLIC struct rproc* lookup_slot_by_flags(int flags)
{
/* Lookup a service slot matching the given flags. */
int slot_nr;
struct rproc *rp;
if(!flags) {
return NULL;
}
for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
rp = &rproc[slot_nr];
if (!(rp->r_flags & RS_IN_USE)) {
continue;
}
if (rp->r_flags & flags) {
return rp;
}
}
return NULL;
}
/*===========================================================================*
* alloc_slot *
*===========================================================================*/

View file

@ -78,6 +78,7 @@ _PROTOTYPE( void swap_slot, (struct rproc **src_rpp, struct rproc **dst_rpp) );
_PROTOTYPE( struct rproc* lookup_slot_by_label, (char *label) );
_PROTOTYPE( struct rproc* lookup_slot_by_pid, (pid_t pid) );
_PROTOTYPE( struct rproc* lookup_slot_by_dev_nr, (dev_t dev_nr) );
_PROTOTYPE( struct rproc* lookup_slot_by_flags, (int flags) );
_PROTOTYPE( int alloc_slot, (struct rproc **rpp) );
_PROTOTYPE( void free_slot, (struct rproc *rp) );
_PROTOTYPE( char *get_next_label, (char *ptr, char *label, char *caller_label));

View file

@ -781,14 +781,25 @@ message *m_ptr;
/* Check if an answer to a status request is still pending. If
* the service didn't respond within time, kill it to simulate
* a crash. The failure will be detected and the service will
* be restarted automatically.
* be restarted automatically. Give the service a free pass if
* somebody is initializing. There may be some weird dependencies
* if another service is, for example, restarting at the same
* time.
*/
if (rp->r_alive_tm < rp->r_check_tm) {
if (now - rp->r_alive_tm > 2*period &&
rp->r_pid > 0 && !(rp->r_flags & RS_NOPINGREPLY)) {
if(rs_verbose)
printf("RS: %s reported late\n",
srv_to_string(rp));
printf("RS: %s reported late\n", srv_to_string(rp));
if(lookup_slot_by_flags(RS_INITIALIZING)) {
/* Skip for now. */
if(rs_verbose)
printf("RS: %s gets a free pass\n",
srv_to_string(rp));
rp->r_alive_tm = now;
rp->r_check_tm = now+1;
continue;
}
rp->r_flags |= RS_NOPINGREPLY;
crash_service(rp); /* simulate crash */
}