Fix runners to do their best to gracefully stop on pod eviction (#1759)

Ref #1535
Ref #1581

Signed-off-by: Yusuke Kuoka <ykuoka@gmail.com>
This commit is contained in:
Yusuke Kuoka
2022-11-01 20:30:10 +09:00
committed by GitHub
parent 332548093a
commit c74ad6195f
30 changed files with 757 additions and 301 deletions

33
test/startup/assets/config.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
export LIGHTGREEN='\e[0;32m'
export LIGHTRED='\e[0;31m'
export WHITE='\e[0;97m'
export RESET='\e[0m'
log(){
printf "\t${WHITE}$@${RESET}\n" 2>&1
}
success(){
printf "\t${LIGHTGREEN}$@${RESET}\n" 2>&1
}
error(){
printf "\t${LIGHTRED}$@${RESET}\n" 2>&1
}
success "I'm configured normally"
# Condition for should_retry_configuring test
if [ -z "${FAIL_RUNNER_CONFIG_SETUP}" ]; then
touch .runner
fi
echo "$@" > runner_config
success "created a dummy config file"
# adding a counter to see how many times we've gone through the configuration step
count=`cat counter 2>/dev/null|| echo "0"`
count=$((count + 1))
echo ${count} > counter

18
test/startup/assets/logging.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
export LIGHTGREEN='\e[0;32m'
export LIGHTRED='\e[0;31m'
export WHITE='\e[0;97m'
export RESET='\e[0m'
log(){
printf "${WHITE}$@${RESET}\n" 2>&1
}
success(){
printf "${LIGHTGREEN}$@${RESET}\n" 2>&1
}
error(){
printf "${LIGHTRED}$@${RESET}\n" 2>&1
}

28
test/startup/assets/run.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
export LIGHTGREEN='\e[0;32m'
export LIGHTRED='\e[0;31m'
export WHITE='\e[0;97m'
export RESET='\e[0m'
log(){
printf "\t${WHITE}$@${RESET}\n" 2>&1
}
success(){
printf "\t${LIGHTGREEN}$@${RESET}\n" 2>&1
}
error(){
printf "\t${LIGHTRED}$@${RESET}\n" 2>&1
exit 1
}
log "Dumping set runner arguments"
echo "$@" > runner_args
success "Pretending to run service..."
touch run_sh_ran
success "Success"
success ""

View File

@@ -0,0 +1,82 @@
#!/usr/bin/env bash
# UNITTEST: retry config
# Will simulate a configuration failure and expects:
# - the configuration step to be run 10 times
# - the startup script to exit with error code 2
# - the run.sh script to never run.
source ../assets/logging.sh
startup_log() {
while read I; do
printf "\tstartup.sh: $I\n"
done
}
log "Setting up test area"
export RUNNER_HOME=testarea
mkdir -p ${RUNNER_HOME}
log "Setting up the test config"
export UNITTEST=true
export FAIL_RUNNER_CONFIG_SETUP=true
export RUNNER_NAME="example_runner_name"
export RUNNER_REPO="myorg/myrepo"
export RUNNER_TOKEN="xxxxxxxxxxxxx"
# run.sh and config.sh get used by the runner's real entrypoint.sh and are part of actions/runner.
# We change symlink dummy versions so the entrypoint.sh can run allowing us to test the real entrypoint.sh
log "Symlink dummy config.sh and run.sh"
ln -s ../../assets/config.sh ${RUNNER_HOME}/config.sh
ln -s ../../assets/run.sh ${RUNNER_HOME}/run.sh
cleanup() {
rm -rf ${RUNNER_HOME}
unset UNITTEST
unset RUNNERHOME
unset RUNNER_NAME
unset RUNNER_REPO
unset RUNNER_TOKEN
unset FAIL_RUNNER_CONFIG_SETUP
}
# Always run cleanup when test ends regardless of how it ends
trap cleanup SIGINT SIGTERM SIGQUIT EXIT
log "Running the startup script"
log ""
# Run the runner startup script which as a final step runs this
# unit tests run.sh as it was symlinked
../../../runner/startup.sh 2> >(startup_log)
if [ "$?" != "2" ]; then
error "========================================="
error "FAIL | Configuration should have thrown an error"
exit 1
fi
success "PASS | Entrypoint didn't complete successfully"
log "Checking the counter, should have 10 iterations"
count=`cat ${RUNNER_HOME}/counter || "notfound"`
if [ "${count}" != "10" ]; then
error "============================================="
error "FAIL | The retry loop should have done 10 iterations"
exit 1
fi
success "PASS | Retry loop went up to 10"
log "Checking that run.sh never ran"
if [ -f ${RUNNER_HOME}/run_sh_ran ]; then
error "================================================================="
error "FAIL | run.sh was invoked, entrypoint.sh should have failed before that."
exit 1
fi
success "PASS | run.sh never ran"
success
success "==========================="
success "Test completed successfully"
exit 0

View File

@@ -0,0 +1,79 @@
#!/usr/bin/env bash
# UNITTEST: should work as non ephemeral
# Will simulate a scenario where ephemeral=false. expects:
# - the configuration step to be run exactly once
# - the startup script to exit with no error
# - the run.sh script to run without the --once flag
source ../assets/logging.sh
startup_log() {
while read I; do
printf "\tstartup.sh: $I\n"
done
}
log "Setting up test area"
export RUNNER_HOME=testarea
mkdir -p ${RUNNER_HOME}
log "Setting up the test"
export UNITTEST=true
export RUNNER_NAME="example_runner_name"
export RUNNER_REPO="myorg/myrepo"
export RUNNER_TOKEN="xxxxxxxxxxxxx"
export RUNNER_EPHEMERAL=false
# run.sh and config.sh get used by the runner's real entrypoint.sh and are part of actions/runner.
# We change symlink dummy versions so the entrypoint.sh can run allowing us to test the real entrypoint.sh
log "Symlink dummy config.sh and run.sh"
ln -s ../../assets/config.sh ${RUNNER_HOME}/config.sh
ln -s ../../assets/run.sh ${RUNNER_HOME}/run.sh
cleanup() {
rm -rf ${RUNNER_HOME}
unset UNITTEST
unset RUNNERHOME
unset RUNNER_NAME
unset RUNNER_REPO
unset RUNNER_TOKEN
unset RUNNER_EPHEMERAL
}
# Always run cleanup when test ends regardless of how it ends
trap cleanup SIGINT SIGTERM SIGQUIT EXIT
log "Running the startup script"
log ""
# Run the runner entrypstartupoint script which as a final step runs this
# unit tests run.sh as it was symlinked
../../../runner/startup.sh 2> >(startup_log)
if [ "$?" != "0" ]; then
error "==========================================="
error "FAIL | Startup script did not exit successfully"
exit 1
fi
log "Testing if we went through the configuration step only once"
count=`cat ${RUNNER_HOME}/counter || echo "not_found"`
if [ ${count} != "1" ]; then
error "==============================================="
error "FAIL | The configuration step was not run exactly once"
exit 1
fi
success "PASS | The configuration ran ${count} time(s)"
log "Testing if run.sh ran"
if [ ! -f "${RUNNER_HOME}/run_sh_ran" ]; then
error "=============================="
error "FAIL | The runner service has not run"
exit 1
fi
success "PASS | run.sh ran"
success ""
success "==========================="
success "Test completed successfully"

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env bash
# UNITTEST: should work normally
# Will simulate a normal execution scenario. expects:
# - the configuration step to be run exactly once
# - the startup script to exit with no error
# - the run.sh script to run with the --once flag activated.
source ../assets/logging.sh
startup_log() {
while read I; do
printf "\startup.sh: $I\n"
done
}
log "Setting up test area"
export RUNNER_HOME=testarea
mkdir -p ${RUNNER_HOME}
log "Setting up the test"
export UNITTEST=true
export RUNNER_NAME="example_runner_name"
export RUNNER_REPO="myorg/myrepo"
export RUNNER_TOKEN="xxxxxxxxxxxxx"
# run.sh and config.sh get used by the runner's real entrypoint.sh and are part of actions/runner.
# We change symlink dummy versions so the entrypoint.sh can run allowing us to test the real entrypoint.sh
log "Symlink dummy config.sh and run.sh"
ln -s ../../assets/config.sh ${RUNNER_HOME}/config.sh
ln -s ../../assets/run.sh ${RUNNER_HOME}/run.sh
cleanup() {
rm -rf ${RUNNER_HOME}
unset UNITTEST
unset RUNNERHOME
unset RUNNER_NAME
unset RUNNER_REPO
unset RUNNER_TOKEN
}
# Always run cleanup when test ends regardless of how it ends
trap cleanup SIGINT SIGTERM SIGQUIT EXIT
log "Running the startup script"
log ""
# Run the runner startup script which as a final step runs this
# unit tests run.sh as it was symlinked
../../../runner/startup.sh 2> >(startup_log)
if [ "$?" != "0" ]; then
error "=========================="
error "Test completed with errors"
exit 1
fi
log "Testing if the configuration step was run only once"
count=`cat ${RUNNER_HOME}/counter || echo "not_found"`
if [ ${count} != "1" ]; then
error "==============================================="
error "FAIL | The configuration step was not run exactly once"
exit 1
fi
success "PASS | The configuration ran ${count} time(s)"
log "Testing if the configuration included the --ephemeral flag"
if grep -q -- '--ephemeral' ${RUNNER_HOME}/runner_config; then
error "==============================================="
error "FAIL | The configuration should not include the --ephemeral flag"
exit 1
fi
success "PASS | The --ephemeral switch was included in the configuration"
log "Testing if run.sh ran"
if [ ! -f "${RUNNER_HOME}/run_sh_ran" ]; then
error "=============================="
error "FAIL | The runner service has not run"
exit 1
fi
success "PASS | run.sh ran"
success ""
success "==========================="
success "Test completed successfully"

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env bash
# UNITTEST: should work disable update
# Will simulate a scneario where disableupdate=true. expects:
# - the configuration step to be run exactly once
# - the startup script to exit with no error
# - the config.sh script to run with the --disableupdate flag set to 'true'.
source ../assets/logging.sh
startup_log() {
while read I; do
printf "\tstartup.sh: $I\n"
done
}
log "Setting up test area"
export RUNNER_HOME=testarea
mkdir -p ${RUNNER_HOME}
log "Setting up the test"
export UNITTEST=true
export RUNNER_NAME="example_runner_name"
export RUNNER_REPO="myorg/myrepo"
export RUNNER_TOKEN="xxxxxxxxxxxxx"
export DISABLE_RUNNER_UPDATE="true"
# run.sh and config.sh get used by the runner's real entrypoint.sh and are part of actions/runner.
# We change symlink dummy versions so the entrypoint.sh can run allowing us to test the real entrypoint.sh
log "Symlink dummy config.sh and run.sh"
ln -s ../../assets/config.sh ${RUNNER_HOME}/config.sh
ln -s ../../assets/run.sh ${RUNNER_HOME}/run.sh
cleanup() {
rm -rf ${RUNNER_HOME}
unset UNITTEST
unset RUNNERHOME
unset RUNNER_NAME
unset RUNNER_REPO
unset RUNNER_TOKEN
}
# Always run cleanup when test ends regardless of how it ends
trap cleanup SIGINT SIGTERM SIGQUIT EXIT
log "Running the startup script"
log ""
# run.sh and config.sh get used by the runner's real startup.sh and are part of actions/runner.
# We change symlink dummy versions so the startup.sh can run allowing us to test the real entrypoint.sh
../../../runner/startup.sh 2> >(startup_log)
if [ "$?" != "0" ]; then
error "=========================="
error "FAIL | Test completed with errors"
exit 1
fi
log "Testing if the configuration step was run only once"
count=`cat ${RUNNER_HOME}/counter || echo "not_found"`
if [ ${count} != "1" ]; then
error "==============================================="
error "FAIL | The configuration step was not run exactly once"
exit 1
fi
success "PASS | The configuration ran ${count} time(s)"
log "Testing if the configuration included the --disableupdate flag"
if ! grep -q -- '--disableupdate' ${RUNNER_HOME}/runner_config; then
error "==============================================="
error "FAIL | The configuration should not include the --disableupdate flag"
exit 1
fi
success "PASS | The --disableupdate switch was included in the configuration"
log "Testing if run.sh ran"
if [ ! -f "${RUNNER_HOME}/run_sh_ran" ]; then
error "=============================="
error "FAIL | The runner service has not run"
exit 1
fi
success "PASS | run.sh ran"
success ""
success "==========================="
success "Test completed successfully"

36
test/startup/test.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/usr/bin/env bash
source assets/logging.sh
for unittest in ./should*; do
log "**********************************"
log " UNIT TEST: ${unittest}"
log "**********************************"
log ""
cd ${unittest}
./test.sh
ret_code=$?
cd ..
log ""
log ""
if [ "${ret_code}" = "0" ]; then
success "Completed: unit test ${unittest}"
else
error "Completed: unit test ${unittest} with errors"
failed="true"
fi
done
if [ -n "${failed:-}" ]; then
error ""
error "*************************************"
error "All unit tests completed, with errors"
error "*************************************"
exit 1
else
success ""
success "***************************************"
success "All unit tests completed with no errors"
success "***************************************"
fi