mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-13 04:56:54 +00:00
fix: force restartPolicy "Never" to prevent runner pods from stucking in Terminating when the container disappeared (#1395)
Ref #1369
This commit is contained in:
@@ -155,7 +155,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RestartPolicy: corev1.RestartPolicyOnFailure,
|
RestartPolicy: corev1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,7 +237,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RestartPolicy: corev1.RestartPolicyOnFailure,
|
RestartPolicy: corev1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,7 +315,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RestartPolicy: corev1.RestartPolicyOnFailure,
|
RestartPolicy: corev1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -576,7 +576,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RestartPolicy: corev1.RestartPolicyOnFailure,
|
RestartPolicy: corev1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -673,7 +673,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RestartPolicy: corev1.RestartPolicyOnFailure,
|
RestartPolicy: corev1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -770,7 +770,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RestartPolicy: corev1.RestartPolicyOnFailure,
|
RestartPolicy: corev1.RestartPolicyNever,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -78,9 +78,7 @@ func (t *PodRunnerTokenInjector) Handle(ctx context.Context, req admission.Reque
|
|||||||
|
|
||||||
updated.Annotations[AnnotationKeyTokenExpirationDate] = ts
|
updated.Annotations[AnnotationKeyTokenExpirationDate] = ts
|
||||||
|
|
||||||
if pod.Spec.RestartPolicy != corev1.RestartPolicyOnFailure {
|
forceRunnerPodRestartPolicyNever(updated)
|
||||||
updated.Spec.RestartPolicy = corev1.RestartPolicyOnFailure
|
|
||||||
}
|
|
||||||
|
|
||||||
buf, err := json.Marshal(updated)
|
buf, err := json.Marshal(updated)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -676,9 +676,7 @@ func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.Ru
|
|||||||
|
|
||||||
pod := template.DeepCopy()
|
pod := template.DeepCopy()
|
||||||
|
|
||||||
if pod.Spec.RestartPolicy == "" {
|
forceRunnerPodRestartPolicyNever(pod)
|
||||||
pod.Spec.RestartPolicy = "OnFailure"
|
|
||||||
}
|
|
||||||
|
|
||||||
if mtu := runnerSpec.DockerMTU; mtu != nil && dockerdInRunner {
|
if mtu := runnerSpec.DockerMTU; mtu != nil && dockerdInRunner {
|
||||||
runnerContainer.Env = append(runnerContainer.Env, []corev1.EnvVar{
|
runnerContainer.Env = append(runnerContainer.Env, []corev1.EnvVar{
|
||||||
|
|||||||
22
controllers/runner_pod.go
Normal file
22
controllers/runner_pod.go
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
package controllers
|
||||||
|
|
||||||
|
import corev1 "k8s.io/api/core/v1"
|
||||||
|
|
||||||
|
// Force the runner pod managed by either RunnerDeployment and RunnerSet to have restartPolicy=Never.
|
||||||
|
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369 for more context.
|
||||||
|
//
|
||||||
|
// This is to prevent runner pods from stucking in Terminating when a K8s node disappeared along with the runnr pod and the runner container within it.
|
||||||
|
//
|
||||||
|
// Previously, we used restartPolicy of OnFailure, it turned wrong later, and therefore we now set Never.
|
||||||
|
//
|
||||||
|
// When the restartPolicy is OnFailure and the node disappeared, runner pods on the node seem to stuck in state.terminated==nil, state.waiting!=nil, and state.lastTerminationState!=nil,
|
||||||
|
// and will ever become Running.
|
||||||
|
// It's probably due to that the node onto which the pods have been scheduled will ever come back, hence the container restart attempt swill ever succeed,
|
||||||
|
// the pods stuck waiting for successful restarts forever.
|
||||||
|
//
|
||||||
|
// By forcing runner pods to never restart, we hope there will be no chances of pods being stuck waiting.
|
||||||
|
func forceRunnerPodRestartPolicyNever(pod *corev1.Pod) {
|
||||||
|
if pod.Spec.RestartPolicy != corev1.RestartPolicyNever {
|
||||||
|
pod.Spec.RestartPolicy = corev1.RestartPolicyNever
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user