a4220d7774
- test multicomponent live update with and without rs and/or vm; - retry the update a few times if the failure code suggests it might be a transient failure. Change-Id: I5fce256bb418be257353ed21428f672d851d974d
436 lines
10 KiB
Bash
Executable file
436 lines
10 KiB
Bash
Executable file
#!/bin/sh
|
|
|
|
# Idea:
|
|
# Iterate over all the /proc/service entries, and
|
|
# for each restatability policy call the policy test function if it is
|
|
# supported. No accounting of failed / successful test is done, as a
|
|
# failed test can currently provoque cascading effects, so instead we
|
|
# fail the test as a whole on the first failure found. Live update tests
|
|
# are currently policy-agnostic.
|
|
#
|
|
# If arguments are given, use this instead of all entries found in
|
|
# /proc/service. Full path have to be provided on the command line, like
|
|
# /usr/tests/minix/testrelpol /proc/service/vfs
|
|
# to test vfs recovery only.
|
|
#
|
|
# Supported policies have to be in the POLICIES variable, and define a test
|
|
# function.
|
|
#
|
|
# Known limitations:
|
|
# - Currently not all recovery policies are tested
|
|
# - Running this test under X11 hangs the X server
|
|
# - Live update tests do not test rollback situations
|
|
#
|
|
# To add a new policy, you have to do the following:
|
|
# 1. Add the policy into the active policies array by:
|
|
# POLICIES="${POLICIES} <policyname>"
|
|
#
|
|
# 2. define the following shell function:
|
|
# pol_<policyname>() {}
|
|
# - it will recieve the following parameters:
|
|
# + service filename as $1 : the full path to the proc entry
|
|
# + label as $2 : the service label
|
|
# - which prints 'ok' on success, 'not ok' on failure.
|
|
|
|
# Currently known policies:
|
|
# /* user | endpoint */
|
|
# POL_RESET, /* visible | change */
|
|
# POL_RESTART /* transparent | preserved */
|
|
|
|
#######################################################################
|
|
# Utility functions & global state initializations
|
|
#######################################################################
|
|
POLICIES=""
|
|
MAX_RETRY=7 # so that a single test takes at most 10 seconds
|
|
MAX_MULTI_LU_RETRY=3 # how many times should we retry after bad luck?
|
|
|
|
# get_value(key, filename, noerror)
|
|
get_value() {
|
|
local value
|
|
local result
|
|
|
|
value=$(grep $1 $2 2>/dev/null)
|
|
result=$?
|
|
|
|
if test $result -ne 2
|
|
then
|
|
echo $value | cut -d: -f2
|
|
else
|
|
test -z "$3" && echo "Error: service $2 down" >&2
|
|
fi
|
|
}
|
|
|
|
# wait_for_service(filename)
|
|
wait_for_service() {
|
|
local retry
|
|
local value
|
|
retry=0
|
|
|
|
# Arbitrary timeout, found by counting the number of mice crossing
|
|
# the hallway.
|
|
sleep 2
|
|
while test ${retry} -lt ${MAX_RETRY}
|
|
do
|
|
sleep 1
|
|
retry=$((${retry} + 1))
|
|
# The service might momentarily disappear from the list.
|
|
value=$(get_value restarts $1 noerror)
|
|
test -n "$value" && test $value -ne $2 && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
#######################################################################
|
|
# Service management routines
|
|
#######################################################################
|
|
prepare_service() {
|
|
local label service
|
|
|
|
service=$1
|
|
label=$2
|
|
|
|
flags=$(get_value flags ${service})
|
|
echo $flags | grep -q 'r' || return 0
|
|
echo $flags | grep -q 'R' && return 0
|
|
|
|
service clone $label
|
|
return 1
|
|
}
|
|
|
|
cleanup_service() {
|
|
local label
|
|
|
|
label=$1
|
|
|
|
service unclone $label
|
|
}
|
|
|
|
#######################################################################
|
|
# POLICY: restart
|
|
#######################################################################
|
|
POLICIES="${POLICIES} restart"
|
|
pol_restart() {
|
|
local label service
|
|
local endpoint_pre endpoint_post
|
|
local restarts_pre restarts_post
|
|
|
|
service=$1
|
|
label=$2
|
|
|
|
restarts_pre=$(get_value restarts ${service})
|
|
endpoint_pre=$(get_value endpoint ${service})
|
|
|
|
service fi ${label}
|
|
if ! wait_for_service ${service} ${restarts_pre}
|
|
then
|
|
echo not ok
|
|
return
|
|
fi
|
|
|
|
restarts_post=$(get_value restarts ${service})
|
|
endpoint_post=$(get_value endpoint ${service})
|
|
|
|
if [ ${restarts_post} -gt ${restarts_pre} \
|
|
-a ${endpoint_post} -eq ${endpoint_pre} ]
|
|
then
|
|
echo ok
|
|
else
|
|
echo not ok
|
|
fi
|
|
}
|
|
|
|
#######################################################################
|
|
# POLICY: reset
|
|
#######################################################################
|
|
POLICIES="${POLICIES} reset"
|
|
pol_reset() {
|
|
local label service
|
|
local endpoint_pre endpoint_post
|
|
local restarts_pre restarts_post
|
|
|
|
service=$1
|
|
label=$2
|
|
|
|
restarts_pre=$(get_value restarts ${service})
|
|
endpoint_pre=$(get_value endpoint ${service})
|
|
|
|
service fi ${label}
|
|
if ! wait_for_service ${service} ${restarts_pre}
|
|
then
|
|
echo not ok
|
|
return
|
|
fi
|
|
|
|
restarts_post=$(get_value restarts ${service})
|
|
endpoint_post=$(get_value endpoint ${service})
|
|
|
|
# This policy doesn't guarantee the endpoint to be kept, but there
|
|
# is a slight chance that it will actualy stay the same, and fail
|
|
# the test.
|
|
if [ ${restarts_post} -gt ${restarts_pre} \
|
|
-a ${endpoint_post} -ne ${endpoint_pre} ]
|
|
then
|
|
echo ok
|
|
else
|
|
echo not ok
|
|
fi
|
|
}
|
|
|
|
#######################################################################
|
|
# Live update tests
|
|
#######################################################################
|
|
lu_test_one() {
|
|
local label=$1
|
|
local prog=$2
|
|
local result=$3
|
|
lu_opts=${lu_opts:-}
|
|
lu_maxtime=${lu_maxtime:-3HZ}
|
|
lu_state=${lu_state:-1}
|
|
|
|
service ${lu_opts} update ${prog} -label ${label} -maxtime ${lu_maxtime} -state ${lu_state}
|
|
if [ $? -ne $result ]
|
|
then
|
|
return 1
|
|
else
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
lu_test() {
|
|
local label service
|
|
local endpoint_pre endpoint_post
|
|
local restarts_pre restarts_post
|
|
|
|
service=$1
|
|
label=$2
|
|
|
|
restarts_pre=$(get_value restarts ${service})
|
|
endpoint_pre=$(get_value endpoint ${service})
|
|
|
|
lu_test_one ${label} self 0 || return
|
|
|
|
# Test live update "prepare only"
|
|
if ! echo "pm rs vfs vm" | grep -q ${label}
|
|
then
|
|
lu_opts="-o" lu_test_one ${label} self 0 || return
|
|
fi
|
|
|
|
# Test live update initialization crash
|
|
lu_opts="-x" lu_test_one ${label} self 200 || return
|
|
|
|
# Test live update initialization failure
|
|
if ! echo "rs" | grep -q ${label}
|
|
then
|
|
lu_opts="-y" lu_test_one ${label} self 78 || return
|
|
fi
|
|
|
|
# Test live update initialization timeout
|
|
if ! echo "rs" | grep -q ${label}
|
|
then
|
|
lu_maxtime="1HZ" lu_opts="-z" lu_test_one ${label} self 4 || return
|
|
fi
|
|
|
|
# Test live update from SEF_LU_STATE_EVAL state
|
|
lu_maxtime="1HZ" lu_state="5" lu_test_one ${label} self 4 || return
|
|
|
|
restarts_post=$(get_value restarts ${service})
|
|
endpoint_post=$(get_value endpoint ${service})
|
|
|
|
# Make sure endpoint and restarts are preserved
|
|
if [ ${restarts_post} -eq ${restarts_pre} \
|
|
-a ${endpoint_post} -eq ${endpoint_pre} ]
|
|
then
|
|
echo ok
|
|
else
|
|
echo not ok
|
|
fi
|
|
}
|
|
|
|
multi_lu_test_one() {
|
|
local expected=$1
|
|
local once_index=$2
|
|
shift 2
|
|
local labels="$*"
|
|
local ret=1
|
|
local retry=0
|
|
local index result
|
|
|
|
lu_opts=${lu_opts:-}
|
|
lu_maxtime=${lu_maxtime:-3HZ}
|
|
lu_state=${lu_state:-1}
|
|
lu_opts_once=${lu_opts_once:-$lu_opts}
|
|
lu_maxtime_once=${lu_maxtime_once:-$lu_maxtime}
|
|
lu_state_once=${lu_state_once:-$lu_state}
|
|
|
|
while [ $ret -eq 1 -a $retry -lt ${MAX_MULTI_LU_RETRY} ]
|
|
do
|
|
index=0
|
|
for label in ${labels}
|
|
do
|
|
index=`expr $index + 1`
|
|
|
|
if [ $index -eq $once_index ]
|
|
then
|
|
service ${lu_opts_once} -q update self \
|
|
-label ${label} \
|
|
-maxtime ${lu_maxtime_once} \
|
|
-state ${lu_state_once} || ret=2
|
|
else
|
|
service ${lu_opts} -q update self \
|
|
-label ${label} \
|
|
-maxtime ${lu_maxtime} \
|
|
-state ${lu_state} || ret=2
|
|
fi
|
|
done
|
|
service sysctl upd_run
|
|
result=$?
|
|
|
|
# We may experience transient failures as a result of services
|
|
# trying to talk to each other while being prepared for the
|
|
# live update. In that case we get result code 4. If that is
|
|
# not the result code we expected, try again for a limited
|
|
# number of times.
|
|
if [ $result -eq $expected ]
|
|
then
|
|
ret=0
|
|
elif [ $result -ne 4 ]
|
|
then
|
|
break
|
|
fi
|
|
retry=`expr $retry + 1`
|
|
done
|
|
|
|
return $ret
|
|
}
|
|
|
|
multi_lu_test() {
|
|
local y_result z_result
|
|
local have_rs=0
|
|
local labels="$*"
|
|
|
|
# Some of the results depend on whether RS is part of the live update.
|
|
for label in ${labels}
|
|
do
|
|
if [ "x$label" = "xrs" ]
|
|
then
|
|
have_rs=1
|
|
fi
|
|
done
|
|
|
|
if [ $have_rs -eq 1 ]
|
|
then
|
|
y_result=200
|
|
z_result=200
|
|
else
|
|
y_result=78
|
|
z_result=4
|
|
fi
|
|
|
|
multi_lu_test_one 0 0 ${labels} || return 1
|
|
lu_opts_once="-x" multi_lu_test_one 200 2 ${labels} || return 1
|
|
lu_opts_once="-y" multi_lu_test_one ${y_result} 3 ${labels} || return 1
|
|
lu_maxtime_once="1HZ" lu_opts_once="-z" multi_lu_test_one ${z_result} 2 ${labels} || return 1
|
|
lu_maxtime_once="1HZ" lu_state_once="5" multi_lu_test_one 4 3 ${labels} || return 1
|
|
|
|
return 0
|
|
}
|
|
|
|
multi_lu_test_wrapper() {
|
|
echo "# testing $@ :: multicomponent live update+rollback"
|
|
if ! multi_lu_test "$@"
|
|
then
|
|
echo "not ok # failed multicomponent live update+rollback"
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
#######################################################################
|
|
# main()
|
|
#######################################################################
|
|
main() {
|
|
local labels service_policies X11
|
|
|
|
# If there is a running X server, skip the input driver
|
|
if ps -ef | grep -v grep | grep -q /usr/X11R7/bin/X
|
|
then
|
|
echo "# This test can't be run while a Xserver is running"
|
|
echo "not ok # A Xserver is running"
|
|
exit 1
|
|
fi
|
|
|
|
if [ $# -eq 0 ]
|
|
then
|
|
services=$(echo /proc/service/*)
|
|
else
|
|
services="$@"
|
|
fi
|
|
|
|
for service in ${services}
|
|
do
|
|
label=$(basename ${service})
|
|
service_policies=$(grep policies ${service}|cut -d: -f2)
|
|
for pol in ${service_policies}
|
|
do
|
|
# Check if the supported policy is under test
|
|
if echo "${POLICIES}" | grep -q ${pol}
|
|
then
|
|
echo "# testing ${label} :: ${pol}"
|
|
cleanup=0
|
|
prepare_service ${service} ${label} || cleanup=1
|
|
result=$(pol_${pol} ${service} ${label})
|
|
if [ "x${result}" != "xok" ]
|
|
then
|
|
echo "not ok # failed ${label}, ${pol}"
|
|
exit 1
|
|
fi
|
|
if [ $cleanup -eq 1 ]
|
|
then
|
|
cleanup_service ${label}
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
if [ $# -gt 0 ]
|
|
then
|
|
echo "ok # partial test for $@ successful"
|
|
exit 0
|
|
fi
|
|
|
|
multi_lu_labels=""
|
|
for service in ${services}
|
|
do
|
|
label=$(basename ${service})
|
|
service_policies=$(grep policies ${service}|cut -d: -f2)
|
|
if echo "${service_policies}" | grep -q "[a-zA-Z]"
|
|
then
|
|
echo "# testing ${label} :: live update+rollback"
|
|
result=$(lu_test ${service} ${label})
|
|
if [ "x${result}" != "xok" ]
|
|
then
|
|
echo "not ok # failed ${label}, live update+rollback"
|
|
exit 1
|
|
fi
|
|
if [ "x${label}" = "xrs" -o "x${label}" = "xvm" ]
|
|
then
|
|
continue
|
|
fi
|
|
service_flags=$(get_value flags ${service})
|
|
if echo $service_flags | grep -q 's'
|
|
then
|
|
multi_lu_labels="${multi_lu_labels} ${label}"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
multi_lu_test_wrapper ${multi_lu_labels} || exit 1
|
|
multi_lu_test_wrapper ${multi_lu_labels} vm || exit 1
|
|
multi_lu_test_wrapper ${multi_lu_labels} rs || exit 1
|
|
multi_lu_test_wrapper ${multi_lu_labels} vm rs || exit 1
|
|
|
|
echo ok
|
|
exit 0
|
|
}
|
|
|
|
main "$@"
|