#!/bin/sh # Idea: # Iterate over all the /proc/service entries, and # for each restatability policy call the policy test function if it is # supported. No accounting of failed / successful test is done, as a # failed test can currently provoque cascading effects, so instead we # fail the test as a whole on the first failure found. Live update tests # are currently policy-agnostic. # # If arguments are given, use this instead of all entries found in # /proc/service. Full path have to be provided on the command line, like # /usr/tests/minix/testrelpol /proc/service/vfs # to test vfs recovery only. # # Supported policies have to be in the POLICIES variable, and define a test # function. # # Known limitations: # - Currently not all recovery policies are tested # - Running this test under X11 hangs the X server # - Live update tests do not test rollback situations # # To add a new policy, you have to do the following: # 1. Add the policy into the active policies array by: # POLICIES="${POLICIES} " # # 2. define the following shell function: # pol_() {} # - it will recieve the following parameters: # + service filename as $1 : the full path to the proc entry # + label as $2 : the service label # - which prints 'ok' on success, 'not ok' on failure. # Currently known policies: # /* user | endpoint */ # POL_RESET, /* visible | change */ # POL_RESTART /* transparent | preserved */ ####################################################################### # Utility functions & global state initializations ####################################################################### POLICIES="" MAX_RETRY=7 # so that a single test takes at most 10 seconds MAX_MULTI_LU_RETRY=3 # how many times should we retry after bad luck? # get_value(key, filename, noerror) get_value() { local value local result value=$(grep $1 $2 2>/dev/null) result=$? if test $result -ne 2 then echo $value | cut -d: -f2 else test -z "$3" && echo "Error: service $2 down" >&2 fi } # wait_for_service(filename) wait_for_service() { local retry local value retry=0 # Arbitrary timeout, found by counting the number of mice crossing # the hallway. sleep 2 while test ${retry} -lt ${MAX_RETRY} do sleep 1 retry=$((${retry} + 1)) # The service might momentarily disappear from the list. value=$(get_value restarts $1 noerror) test -n "$value" && test $value -ne $2 && return 0 done return 1 } ####################################################################### # Service management routines ####################################################################### prepare_service() { local label service service=$1 label=$2 flags=$(get_value flags ${service}) echo $flags | grep -q 'r' || return 0 echo $flags | grep -q 'R' && return 0 service clone $label return 1 } cleanup_service() { local label label=$1 service unclone $label } ####################################################################### # POLICY: restart ####################################################################### POLICIES="${POLICIES} restart" pol_restart() { local label service local endpoint_pre endpoint_post local restarts_pre restarts_post service=$1 label=$2 restarts_pre=$(get_value restarts ${service}) endpoint_pre=$(get_value endpoint ${service}) service fi ${label} if ! wait_for_service ${service} ${restarts_pre} then echo not ok return fi restarts_post=$(get_value restarts ${service}) endpoint_post=$(get_value endpoint ${service}) if [ ${restarts_post} -gt ${restarts_pre} \ -a ${endpoint_post} -eq ${endpoint_pre} ] then echo ok else echo not ok fi } ####################################################################### # POLICY: reset ####################################################################### POLICIES="${POLICIES} reset" pol_reset() { local label service local endpoint_pre endpoint_post local restarts_pre restarts_post service=$1 label=$2 restarts_pre=$(get_value restarts ${service}) endpoint_pre=$(get_value endpoint ${service}) service fi ${label} if ! wait_for_service ${service} ${restarts_pre} then echo not ok return fi restarts_post=$(get_value restarts ${service}) endpoint_post=$(get_value endpoint ${service}) # This policy doesn't guarantee the endpoint to be kept, but there # is a slight chance that it will actualy stay the same, and fail # the test. if [ ${restarts_post} -gt ${restarts_pre} \ -a ${endpoint_post} -ne ${endpoint_pre} ] then echo ok else echo not ok fi } ####################################################################### # Live update tests ####################################################################### lu_test_one() { local label=$1 local prog=$2 local result=$3 lu_opts=${lu_opts:-} lu_maxtime=${lu_maxtime:-3HZ} lu_state=${lu_state:-1} service ${lu_opts} update ${prog} -label ${label} -maxtime ${lu_maxtime} -state ${lu_state} if [ $? -ne $result ] then return 1 else return 0 fi } lu_test() { local label service local endpoint_pre endpoint_post local restarts_pre restarts_post service=$1 label=$2 restarts_pre=$(get_value restarts ${service}) endpoint_pre=$(get_value endpoint ${service}) lu_test_one ${label} self 0 || return # Test live update "prepare only" if ! echo "pm rs vfs vm" | grep -q ${label} then lu_opts="-o" lu_test_one ${label} self 0 || return fi # Test live update initialization crash lu_opts="-x" lu_test_one ${label} self 200 || return # Test live update initialization failure if ! echo "rs" | grep -q ${label} then lu_opts="-y" lu_test_one ${label} self 78 || return fi # Test live update initialization timeout if ! echo "rs" | grep -q ${label} then lu_maxtime="1HZ" lu_opts="-z" lu_test_one ${label} self 4 || return fi # Test live update from SEF_LU_STATE_EVAL state lu_maxtime="1HZ" lu_state="5" lu_test_one ${label} self 4 || return restarts_post=$(get_value restarts ${service}) endpoint_post=$(get_value endpoint ${service}) # Make sure endpoint and restarts are preserved if [ ${restarts_post} -eq ${restarts_pre} \ -a ${endpoint_post} -eq ${endpoint_pre} ] then echo ok else echo not ok fi } multi_lu_test_one() { local expected=$1 local once_index=$2 shift 2 local labels="$*" local ret=1 local retry=0 local index result lu_opts=${lu_opts:-} lu_maxtime=${lu_maxtime:-3HZ} lu_state=${lu_state:-1} lu_opts_once=${lu_opts_once:-$lu_opts} lu_maxtime_once=${lu_maxtime_once:-$lu_maxtime} lu_state_once=${lu_state_once:-$lu_state} while [ $ret -eq 1 -a $retry -lt ${MAX_MULTI_LU_RETRY} ] do index=0 for label in ${labels} do index=`expr $index + 1` if [ $index -eq $once_index ] then service ${lu_opts_once} -q update self \ -label ${label} \ -maxtime ${lu_maxtime_once} \ -state ${lu_state_once} || ret=2 else service ${lu_opts} -q update self \ -label ${label} \ -maxtime ${lu_maxtime} \ -state ${lu_state} || ret=2 fi done service sysctl upd_run result=$? # We may experience transient failures as a result of services # trying to talk to each other while being prepared for the # live update. In that case we get result code 4. If that is # not the result code we expected, try again for a limited # number of times. if [ $result -eq $expected ] then ret=0 elif [ $result -ne 4 ] then break fi retry=`expr $retry + 1` done return $ret } multi_lu_test() { local y_result z_result local have_rs=0 local labels="$*" # Some of the results depend on whether RS is part of the live update. for label in ${labels} do if [ "x$label" = "xrs" ] then have_rs=1 fi done if [ $have_rs -eq 1 ] then y_result=200 z_result=200 else y_result=78 z_result=4 fi multi_lu_test_one 0 0 ${labels} || return 1 lu_opts_once="-x" multi_lu_test_one 200 2 ${labels} || return 1 lu_opts_once="-y" multi_lu_test_one ${y_result} 3 ${labels} || return 1 lu_maxtime_once="1HZ" lu_opts_once="-z" multi_lu_test_one ${z_result} 2 ${labels} || return 1 lu_maxtime_once="1HZ" lu_state_once="5" multi_lu_test_one 4 3 ${labels} || return 1 return 0 } multi_lu_test_wrapper() { echo "# testing $@ :: multicomponent live update+rollback" if ! multi_lu_test "$@" then echo "not ok # failed multicomponent live update+rollback" return 1 fi return 0 } ####################################################################### # main() ####################################################################### main() { local labels service_policies X11 # If there is a running X server, skip the input driver if ps -ef | grep -v grep | grep -q /usr/X11R7/bin/X then echo "# This test can't be run while a Xserver is running" echo "not ok # A Xserver is running" exit 1 fi if [ $# -eq 0 ] then services=$(echo /proc/service/*) else services="$@" fi for service in ${services} do label=$(basename ${service}) service_policies=$(grep policies ${service}|cut -d: -f2) for pol in ${service_policies} do # Check if the supported policy is under test if echo "${POLICIES}" | grep -q ${pol} then echo "# testing ${label} :: ${pol}" cleanup=0 prepare_service ${service} ${label} || cleanup=1 result=$(pol_${pol} ${service} ${label}) if [ "x${result}" != "xok" ] then echo "not ok # failed ${label}, ${pol}" exit 1 fi if [ $cleanup -eq 1 ] then cleanup_service ${label} fi fi done done if [ $# -gt 0 ] then echo "ok # partial test for $@ successful" exit 0 fi multi_lu_labels="" for service in ${services} do label=$(basename ${service}) service_policies=$(grep policies ${service}|cut -d: -f2) if echo "${service_policies}" | grep -q "[a-zA-Z]" then echo "# testing ${label} :: live update+rollback" result=$(lu_test ${service} ${label}) if [ "x${result}" != "xok" ] then echo "not ok # failed ${label}, live update+rollback" exit 1 fi if [ "x${label}" = "xrs" -o "x${label}" = "xvm" ] then continue fi service_flags=$(get_value flags ${service}) if echo $service_flags | grep -q 's' then multi_lu_labels="${multi_lu_labels} ${label}" fi fi done multi_lu_test_wrapper ${multi_lu_labels} || exit 1 multi_lu_test_wrapper ${multi_lu_labels} vm || exit 1 multi_lu_test_wrapper ${multi_lu_labels} rs || exit 1 multi_lu_test_wrapper ${multi_lu_labels} vm rs || exit 1 echo ok exit 0 } main "$@"