minix/minix/tests/testrelpol.sh
David van Moolenbroek a4220d7774 tests: extend multicomponent live update test
- test multicomponent live update with and without rs and/or vm;
- retry the update a few times if the failure code suggests it might
  be a transient failure.

Change-Id: I5fce256bb418be257353ed21428f672d851d974d
2015-09-17 14:11:48 +00:00

436 lines
10 KiB
Bash
Executable file

#!/bin/sh
# Idea:
# Iterate over all the /proc/service entries, and
# for each restatability policy call the policy test function if it is
# supported. No accounting of failed / successful test is done, as a
# failed test can currently provoque cascading effects, so instead we
# fail the test as a whole on the first failure found. Live update tests
# are currently policy-agnostic.
#
# If arguments are given, use this instead of all entries found in
# /proc/service. Full path have to be provided on the command line, like
# /usr/tests/minix/testrelpol /proc/service/vfs
# to test vfs recovery only.
#
# Supported policies have to be in the POLICIES variable, and define a test
# function.
#
# Known limitations:
# - Currently not all recovery policies are tested
# - Running this test under X11 hangs the X server
# - Live update tests do not test rollback situations
#
# To add a new policy, you have to do the following:
# 1. Add the policy into the active policies array by:
# POLICIES="${POLICIES} <policyname>"
#
# 2. define the following shell function:
# pol_<policyname>() {}
# - it will recieve the following parameters:
# + service filename as $1 : the full path to the proc entry
# + label as $2 : the service label
# - which prints 'ok' on success, 'not ok' on failure.
# Currently known policies:
# /* user | endpoint */
# POL_RESET, /* visible | change */
# POL_RESTART /* transparent | preserved */
#######################################################################
# Utility functions & global state initializations
#######################################################################
POLICIES=""
MAX_RETRY=7 # so that a single test takes at most 10 seconds
MAX_MULTI_LU_RETRY=3 # how many times should we retry after bad luck?
# get_value(key, filename, noerror)
get_value() {
local value
local result
value=$(grep $1 $2 2>/dev/null)
result=$?
if test $result -ne 2
then
echo $value | cut -d: -f2
else
test -z "$3" && echo "Error: service $2 down" >&2
fi
}
# wait_for_service(filename)
wait_for_service() {
local retry
local value
retry=0
# Arbitrary timeout, found by counting the number of mice crossing
# the hallway.
sleep 2
while test ${retry} -lt ${MAX_RETRY}
do
sleep 1
retry=$((${retry} + 1))
# The service might momentarily disappear from the list.
value=$(get_value restarts $1 noerror)
test -n "$value" && test $value -ne $2 && return 0
done
return 1
}
#######################################################################
# Service management routines
#######################################################################
prepare_service() {
local label service
service=$1
label=$2
flags=$(get_value flags ${service})
echo $flags | grep -q 'r' || return 0
echo $flags | grep -q 'R' && return 0
service clone $label
return 1
}
cleanup_service() {
local label
label=$1
service unclone $label
}
#######################################################################
# POLICY: restart
#######################################################################
POLICIES="${POLICIES} restart"
pol_restart() {
local label service
local endpoint_pre endpoint_post
local restarts_pre restarts_post
service=$1
label=$2
restarts_pre=$(get_value restarts ${service})
endpoint_pre=$(get_value endpoint ${service})
service fi ${label}
if ! wait_for_service ${service} ${restarts_pre}
then
echo not ok
return
fi
restarts_post=$(get_value restarts ${service})
endpoint_post=$(get_value endpoint ${service})
if [ ${restarts_post} -gt ${restarts_pre} \
-a ${endpoint_post} -eq ${endpoint_pre} ]
then
echo ok
else
echo not ok
fi
}
#######################################################################
# POLICY: reset
#######################################################################
POLICIES="${POLICIES} reset"
pol_reset() {
local label service
local endpoint_pre endpoint_post
local restarts_pre restarts_post
service=$1
label=$2
restarts_pre=$(get_value restarts ${service})
endpoint_pre=$(get_value endpoint ${service})
service fi ${label}
if ! wait_for_service ${service} ${restarts_pre}
then
echo not ok
return
fi
restarts_post=$(get_value restarts ${service})
endpoint_post=$(get_value endpoint ${service})
# This policy doesn't guarantee the endpoint to be kept, but there
# is a slight chance that it will actualy stay the same, and fail
# the test.
if [ ${restarts_post} -gt ${restarts_pre} \
-a ${endpoint_post} -ne ${endpoint_pre} ]
then
echo ok
else
echo not ok
fi
}
#######################################################################
# Live update tests
#######################################################################
lu_test_one() {
local label=$1
local prog=$2
local result=$3
lu_opts=${lu_opts:-}
lu_maxtime=${lu_maxtime:-3HZ}
lu_state=${lu_state:-1}
service ${lu_opts} update ${prog} -label ${label} -maxtime ${lu_maxtime} -state ${lu_state}
if [ $? -ne $result ]
then
return 1
else
return 0
fi
}
lu_test() {
local label service
local endpoint_pre endpoint_post
local restarts_pre restarts_post
service=$1
label=$2
restarts_pre=$(get_value restarts ${service})
endpoint_pre=$(get_value endpoint ${service})
lu_test_one ${label} self 0 || return
# Test live update "prepare only"
if ! echo "pm rs vfs vm" | grep -q ${label}
then
lu_opts="-o" lu_test_one ${label} self 0 || return
fi
# Test live update initialization crash
lu_opts="-x" lu_test_one ${label} self 200 || return
# Test live update initialization failure
if ! echo "rs" | grep -q ${label}
then
lu_opts="-y" lu_test_one ${label} self 78 || return
fi
# Test live update initialization timeout
if ! echo "rs" | grep -q ${label}
then
lu_maxtime="1HZ" lu_opts="-z" lu_test_one ${label} self 4 || return
fi
# Test live update from SEF_LU_STATE_EVAL state
lu_maxtime="1HZ" lu_state="5" lu_test_one ${label} self 4 || return
restarts_post=$(get_value restarts ${service})
endpoint_post=$(get_value endpoint ${service})
# Make sure endpoint and restarts are preserved
if [ ${restarts_post} -eq ${restarts_pre} \
-a ${endpoint_post} -eq ${endpoint_pre} ]
then
echo ok
else
echo not ok
fi
}
multi_lu_test_one() {
local expected=$1
local once_index=$2
shift 2
local labels="$*"
local ret=1
local retry=0
local index result
lu_opts=${lu_opts:-}
lu_maxtime=${lu_maxtime:-3HZ}
lu_state=${lu_state:-1}
lu_opts_once=${lu_opts_once:-$lu_opts}
lu_maxtime_once=${lu_maxtime_once:-$lu_maxtime}
lu_state_once=${lu_state_once:-$lu_state}
while [ $ret -eq 1 -a $retry -lt ${MAX_MULTI_LU_RETRY} ]
do
index=0
for label in ${labels}
do
index=`expr $index + 1`
if [ $index -eq $once_index ]
then
service ${lu_opts_once} -q update self \
-label ${label} \
-maxtime ${lu_maxtime_once} \
-state ${lu_state_once} || ret=2
else
service ${lu_opts} -q update self \
-label ${label} \
-maxtime ${lu_maxtime} \
-state ${lu_state} || ret=2
fi
done
service sysctl upd_run
result=$?
# We may experience transient failures as a result of services
# trying to talk to each other while being prepared for the
# live update. In that case we get result code 4. If that is
# not the result code we expected, try again for a limited
# number of times.
if [ $result -eq $expected ]
then
ret=0
elif [ $result -ne 4 ]
then
break
fi
retry=`expr $retry + 1`
done
return $ret
}
multi_lu_test() {
local y_result z_result
local have_rs=0
local labels="$*"
# Some of the results depend on whether RS is part of the live update.
for label in ${labels}
do
if [ "x$label" = "xrs" ]
then
have_rs=1
fi
done
if [ $have_rs -eq 1 ]
then
y_result=200
z_result=200
else
y_result=78
z_result=4
fi
multi_lu_test_one 0 0 ${labels} || return 1
lu_opts_once="-x" multi_lu_test_one 200 2 ${labels} || return 1
lu_opts_once="-y" multi_lu_test_one ${y_result} 3 ${labels} || return 1
lu_maxtime_once="1HZ" lu_opts_once="-z" multi_lu_test_one ${z_result} 2 ${labels} || return 1
lu_maxtime_once="1HZ" lu_state_once="5" multi_lu_test_one 4 3 ${labels} || return 1
return 0
}
multi_lu_test_wrapper() {
echo "# testing $@ :: multicomponent live update+rollback"
if ! multi_lu_test "$@"
then
echo "not ok # failed multicomponent live update+rollback"
return 1
fi
return 0
}
#######################################################################
# main()
#######################################################################
main() {
local labels service_policies X11
# If there is a running X server, skip the input driver
if ps -ef | grep -v grep | grep -q /usr/X11R7/bin/X
then
echo "# This test can't be run while a Xserver is running"
echo "not ok # A Xserver is running"
exit 1
fi
if [ $# -eq 0 ]
then
services=$(echo /proc/service/*)
else
services="$@"
fi
for service in ${services}
do
label=$(basename ${service})
service_policies=$(grep policies ${service}|cut -d: -f2)
for pol in ${service_policies}
do
# Check if the supported policy is under test
if echo "${POLICIES}" | grep -q ${pol}
then
echo "# testing ${label} :: ${pol}"
cleanup=0
prepare_service ${service} ${label} || cleanup=1
result=$(pol_${pol} ${service} ${label})
if [ "x${result}" != "xok" ]
then
echo "not ok # failed ${label}, ${pol}"
exit 1
fi
if [ $cleanup -eq 1 ]
then
cleanup_service ${label}
fi
fi
done
done
if [ $# -gt 0 ]
then
echo "ok # partial test for $@ successful"
exit 0
fi
multi_lu_labels=""
for service in ${services}
do
label=$(basename ${service})
service_policies=$(grep policies ${service}|cut -d: -f2)
if echo "${service_policies}" | grep -q "[a-zA-Z]"
then
echo "# testing ${label} :: live update+rollback"
result=$(lu_test ${service} ${label})
if [ "x${result}" != "xok" ]
then
echo "not ok # failed ${label}, live update+rollback"
exit 1
fi
if [ "x${label}" = "xrs" -o "x${label}" = "xvm" ]
then
continue
fi
service_flags=$(get_value flags ${service})
if echo $service_flags | grep -q 's'
then
multi_lu_labels="${multi_lu_labels} ${label}"
fi
fi
done
multi_lu_test_wrapper ${multi_lu_labels} || exit 1
multi_lu_test_wrapper ${multi_lu_labels} vm || exit 1
multi_lu_test_wrapper ${multi_lu_labels} rs || exit 1
multi_lu_test_wrapper ${multi_lu_labels} vm rs || exit 1
echo ok
exit 0
}
main "$@"