mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-11 12:06:57 +00:00
Fix runners to do their best to gracefully stop on pod eviction (#1759)
Ref #1535 Ref #1581 Signed-off-by: Yusuke Kuoka <ykuoka@gmail.com>
This commit is contained in:
@@ -190,6 +190,17 @@ func TestNewRunnerPod(t *testing.T) {
|
||||
SecurityContext: &corev1.SecurityContext{
|
||||
Privileged: func(b bool) *bool { return &b }(true),
|
||||
},
|
||||
Lifecycle: &corev1.Lifecycle{
|
||||
PreStop: &corev1.LifecycleHandler{
|
||||
Exec: &corev1.ExecAction{
|
||||
Command: []string{
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
"timeout \"${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}\" /bin/sh -c \"echo 'Prestop hook started'; while [ -f /runner/.runner ]; do sleep 1; done; echo 'Waiting for dockerd to start'; while ! pgrep -x dockerd; do sleep 1; done; echo 'Prestop hook stopped'\" >/proc/1/fd/1 2>&1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
RestartPolicy: corev1.RestartPolicyNever,
|
||||
@@ -709,6 +720,17 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
||||
SecurityContext: &corev1.SecurityContext{
|
||||
Privileged: func(b bool) *bool { return &b }(true),
|
||||
},
|
||||
Lifecycle: &corev1.Lifecycle{
|
||||
PreStop: &corev1.LifecycleHandler{
|
||||
Exec: &corev1.ExecAction{
|
||||
Command: []string{
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
"timeout \"${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}\" /bin/sh -c \"echo 'Prestop hook started'; while [ -f /runner/.runner ]; do sleep 1; done; echo 'Waiting for dockerd to start'; while ! pgrep -x dockerd; do sleep 1; done; echo 'Prestop hook stopped'\" >/proc/1/fd/1 2>&1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
RestartPolicy: corev1.RestartPolicyNever,
|
||||
|
||||
@@ -1151,6 +1151,27 @@ func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, ru
|
||||
fmt.Sprintf("--registry-mirror=%s", dockerRegistryMirror),
|
||||
)
|
||||
}
|
||||
|
||||
dockerdContainer.Lifecycle = &corev1.Lifecycle{
|
||||
PreStop: &corev1.LifecycleHandler{
|
||||
Exec: &corev1.ExecAction{
|
||||
Command: []string{
|
||||
"/bin/sh", "-c",
|
||||
// A prestop hook can start before the dockerd start up, for example, when the docker init is still provisioning
|
||||
// the TLS key and the cert to be used by dockerd.
|
||||
//
|
||||
// The author of this prestop script encountered issues where the prestophung for ten or more minutes on his cluster.
|
||||
// He realized that the hang happened when a prestop hook is executed while the docker init is provioning the key and cert.
|
||||
// Assuming it's due to that the SIGTERM sent by K8s after the prestop hook was ignored by the docker init at that time,
|
||||
// and it needed to wait until terminationGracePeriodSeconds to elapse before finally killing the container,
|
||||
// he wrote this script so that it tries to delay SIGTERM until dockerd starts and becomes ready for processing the signal.
|
||||
//
|
||||
// Also note that we don't need to run `pkill dockerd` at the end of the prehook script, as SIGTERM is sent by K8s after the prestop had completed.
|
||||
`timeout "${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}" /bin/sh -c "echo 'Prestop hook started'; while [ -f /runner/.runner ]; do sleep 1; done; echo 'Waiting for dockerd to start'; while ! pgrep -x dockerd; do sleep 1; done; echo 'Prestop hook stopped'" >/proc/1/fd/1 2>&1`,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if runnerContainerIndex == -1 {
|
||||
|
||||
Reference in New Issue
Block a user