Fix runner pod to not stuck in Terminating when runner got deleted before pod scheduling (#2043)

This fixes the said issue that I found while I was running a series of E2E tests to test other features and pull requestes I have recently contributed.
2026-03-19 08:17:41 +08:00 · 2022-11-27 11:13:38 +09:00
parent 877c93c5c3
commit 96a930bfd9
2 changed files with 27 additions and 1 deletions
--- a/controllers/runner_controller.go
+++ b/controllers/runner_controller.go
@@ -207,9 +207,35 @@ func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool {

 				if status.State.Terminated != nil {
 					stopped = true
+					break
 				}
 			}
 		}
+
+		if pod.DeletionTimestamp != nil && !pod.DeletionTimestamp.IsZero() && len(pod.Status.ContainerStatuses) == 0 {
+			// This falls into cases where the pod is stuck with pod status like the below:
+			//
+			//   status:
+			//     conditions:
+			//     - lastProbeTime: null
+			//       lastTransitionTime: "2022-11-20T07:58:05Z"
+			//       message: 'binding rejected: running Bind plugin "DefaultBinder": Operation cannot
+			//         be fulfilled on pods/binding "org-runnerdeploy-l579v-qx5p2": pod org-runnerdeploy-l579v-qx5p2
+			//         is being deleted, cannot be assigned to a host'
+			//       reason: SchedulerError
+			//       status: "False"
+			//       type: PodScheduled
+			//     phase: Pending
+			//     qosClass: BestEffort
+			//
+			// ARC usually waits for the registration timeout to elapse when the pod is terminated before getting scheduled onto a node,
+			// assuming there can be a race condition between ARC and Kubernetes where Kubernetes schedules the pod while ARC is deleting the pod,
+			// which may end up with non-gracefully terminating the runner.
+			//
+			// However, Kubernetes seems to not schedule the pod after observing status like the above.
+			// This if-block is therefore needed to prevent ARC from unnecessarily waiting for the registration timeout to happen.
+			stopped = true
+		}
 	}

 	return stopped