Work-around for offline+busy runners (#993 )

Ref #911
chore(deps): update quay.io/brancz/kube-rbac-proxy docker tag to v0.11.0 (#745 )
2025-12-11 03:57:01 +00:00 · 2021-12-09 09:31:06 +09:00 · 2021-12-08 22:53:50 +00:00 · 2021-12-08 22:03:42 +00:00
4 changed files with 24 additions and 14 deletions
--- a/5
+++ b/5
@@ -17,6 +17,7 @@ RUNNER_FEATURE_FLAG_EPHEMERAL ?=
 KUBECONTEXT ?= kind-acceptance
 CLUSTER ?= acceptance
 CERT_MANAGER_VERSION ?= v1.1.1
 KUBE_RBAC_PROXY_VERSION ?= v0.11.0
 # Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
 CRD_OPTIONS ?= "crd:trivialVersions=true,generateEmbeddedObjectMeta=true"
@@ -156,7 +157,7 @@ acceptance/kind:
 # See https://kind.sigs.k8s.io/docs/user/known-issues/#docker-installed-with-snap
 acceptance/load:
 	kind load docker-image ${NAME}:${VERSION} --name ${CLUSTER}
-	kind load docker-image quay.io/brancz/kube-rbac-proxy:v0.10.0 --name ${CLUSTER}
+	kind load docker-image quay.io/brancz/kube-rbac-proxy:$(KUBE_RBAC_PROXY_VERSION) --name ${CLUSTER}
 	kind load docker-image ${RUNNER_NAME}:${RUNNER_TAG} --name ${CLUSTER}
 	kind load docker-image docker:dind --name ${CLUSTER}
 	kind load docker-image quay.io/jetstack/cert-manager-controller:$(CERT_MANAGER_VERSION) --name ${CLUSTER}
@@ -166,7 +167,7 @@ acceptance/load:
 # Pull the docker images for acceptance
 acceptance/pull:
-	docker pull quay.io/brancz/kube-rbac-proxy:v0.10.0
+	docker pull quay.io/brancz/kube-rbac-proxy:$(KUBE_RBAC_PROXY_VERSION)
 	docker pull docker:dind
 	docker pull quay.io/jetstack/cert-manager-controller:$(CERT_MANAGER_VERSION)
 	docker pull quay.io/jetstack/cert-manager-cainjector:$(CERT_MANAGER_VERSION)
--- a/README.md
+++ b/README.md
@@ -1116,7 +1116,6 @@ kind: RunnerSet
 metadata:
  name: example
 spec:
  # NOTE: RunnerSet supports non-ephemeral runners only today
  ephemeral: false
  replicas: 2
  repository: mumoshu/actions-runner-controller-ci
@@ -1162,7 +1161,7 @@ We envision that `RunnerSet` will eventually replace `RunnerDeployment`, as `Run
 **Limitations**
 * For autoscaling the `RunnerSet` kind only supports pull driven scaling or the `workflow_job` event for webhook driven scaling.
-* For autoscaling the `RunnerSet` kind doesn't support the [registration-only runner](#autoscaling-tofrom-0)
+* For autoscaling the `RunnerSet` kind doesn't support the [registration-only runner](#autoscaling-tofrom-0), these are deprecated however and to be [removed](https://github.com/actions-runner-controller/actions-runner-controller/issues/859)
 * A known down-side of relying on `StatefulSet` is that it misses a support for `maxUnavailable`. A `StatefulSet` basically works like `maxUnavailable: 1` in `Deployment`, which means that it can take down only one pod concurrently while doing a rolling-update of pods. Kubernetes 1.22 doesn't support customizing it yet so probably it takes more releases to arrive. See https://github.com/kubernetes/kubernetes/issues/68397 for more information.
 ### Ephemeral Runners
--- a/charts/actions-runner-controller/values.yaml
+++ b/charts/actions-runner-controller/values.yaml
@@ -88,7 +88,7 @@ metrics:
    enabled: true
    image:
      repository: quay.io/brancz/kube-rbac-proxy
-      tag: v0.10.0
+      tag: v0.11.0
 resources:
  {}
--- a/controllers/runner_controller.go
+++ b/controllers/runner_controller.go
@@ -422,16 +422,26 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
 						"configuredRegistrationTimeout", registrationTimeout,
 					)
 				} else if registrationDidTimeout {
-					log.Info(
+					if runnerBusy {
-						"Already existing GitHub runner still appears offline . "+
+						log.Info(
-							"Recreating the pod to see if it resolves the issue. "+
+							"Timeout out while waiting for the runner to be online, but observed that it's busy at the same time."+
-							"CAUTION: If you see this a lot, you should investigate the root cause. ",
+								"This is a known (unintuitive) behaviour of a runner that is already running a job. Please see https://github.com/actions-runner-controller/actions-runner-controller/issues/911",
-						"podCreationTimestamp", pod.CreationTimestamp,
+							"podCreationTimestamp", pod.CreationTimestamp,
-						"currentTime", currentTime,
+							"currentTime", currentTime,
-						"configuredRegistrationTimeout", registrationTimeout,
+							"configuredRegistrationTimeout", registrationTimeout,
-					)
+						)
 					} else {
 						log.Info(
 							"Already existing GitHub runner still appears offline . "+
 								"Recreating the pod to see if it resolves the issue. "+
 								"CAUTION: If you see this a lot, you should investigate the root cause. ",
 							"podCreationTimestamp", pod.CreationTimestamp,
 							"currentTime", currentTime,
 							"configuredRegistrationTimeout", registrationTimeout,
 						)
-					restart = true
+						restart = true
 					}
 				} else {
 					log.V(1).Info(
 						"Runner pod exists but the GitHub runner appears to be still offline. Waiting for runner to get online ...",
Author	SHA1	Message	Date
Yusuke Kuoka	898ad3c355	Work-around for offline+busy runners (#993 ) Ref #911	2021-12-09 09:31:06 +09:00
renovate[bot]	164a91b18f	chore(deps): update quay.io/brancz/kube-rbac-proxy docker tag to v0.11.0 (#745 ) * chore(deps): update quay.io/brancz/kube-rbac-proxy docker tag to v0.11.0 * chore(deps): update quay.io/brancz/kube-rbac-proxy make tag to v0.11.0 Co-authored-by: Renovate Bot <bot@renovateapp.com> Co-authored-by: Callum Tait <15716903+toast-gear@users.noreply.github.com>	2021-12-08 22:53:50 +00:00
Callum Tait	acb004f291	docs: remove RunnerSet limitation (#991 )	2021-12-08 22:03:42 +00:00