Stop creating registration-only runners on scale-to-zero (#1028)

Resolves #859
This commit is contained in:
Yusuke Kuoka
2022-01-07 09:56:21 +09:00
committed by GitHub
parent 02679ac1d8
commit 01301d3ce8
4 changed files with 19 additions and 67 deletions

View File

@@ -117,6 +117,17 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
desired = 1
}
// TODO: remove this registration runner cleanup later (v0.23.0 or v0.24.0)
//
// We had to have a registration-only runner to support scale-from-zero before.
// But since Sep 2021 Actions update on GitHub Cloud and GHES 3.3, it is unneceesary.
// See the below issues for more contexts:
// https://github.com/actions-runner-controller/actions-runner-controller/issues/516
// https://github.com/actions-runner-controller/actions-runner-controller/issues/859
//
// In the below block, we have a logic to remove existing registration-only runners as unnecessary.
// This logic is introduced since actions-runner-controller 0.21.0 and probably last one or two minor releases
// so that actions-runner-controller instance in everyone's cluster won't leave dangling registration-only runners.
registrationOnlyRunnerNsName := req.NamespacedName
registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
registrationOnlyRunner := v1alpha1.Runner{}
@@ -133,52 +144,11 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
registrationOnlyRunnerExists = true
}
// On scale to zero, we must have fully registered registration-only runner before we start deleting other runners, hence `desired == 0`
// On scale from zero, we must retain the registratoin-only runner until one or more other runners get registered, hence `registrationOnlyRunnerExists && available == 0`.
// On RunnerReplicaSet creation, it have always 0 replics and no registration-only runner.
// In this case We don't need to bother creating a registration-only runner which gets deleted soon after we have 1 or more available repolicas,
// hence it's not `available == 0`, but `registrationOnlyRunnerExists && available == 0`.
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/516
registrationOnlyRunnerNeeded := desired == 0 || (registrationOnlyRunnerExists && current == 0)
if registrationOnlyRunnerExists {
if err := r.Client.Delete(ctx, &registrationOnlyRunner); err != nil {
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
if registrationOnlyRunnerNeeded {
if registrationOnlyRunnerExists {
if registrationOnlyRunner.Status.Phase == "" {
log.Info("Still waiting for the registration-only runner to be registered")
return ctrl.Result{}, nil
}
} else {
// A registration-only runner does not exist and is needed, hence create it.
runnerForScaleFromToZero, err := r.newRunner(rs)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create runner for scale from/to zero: %v", err)
}
runnerForScaleFromToZero.ObjectMeta.Name = registrationOnlyRunnerNsName.Name
runnerForScaleFromToZero.ObjectMeta.GenerateName = ""
runnerForScaleFromToZero.ObjectMeta.Labels = nil
metav1.SetMetaDataAnnotation(&runnerForScaleFromToZero.ObjectMeta, annotationKeyRegistrationOnly, "true")
if err := r.Client.Create(ctx, &runnerForScaleFromToZero); err != nil {
log.Error(err, "Failed to create runner for scale from/to zero")
return ctrl.Result{}, err
}
// We can continue to deleting runner pods only after the
// registration-only runner gets registered.
return ctrl.Result{}, nil
}
} else {
// A registration-only runner exists and is not needed, hence delete it.
if registrationOnlyRunnerExists {
if err := r.Client.Delete(ctx, &registrationOnlyRunner); err != nil {
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
return ctrl.Result{Requeue: true}, nil
}
return ctrl.Result{Requeue: true}, nil
}
}

View File

@@ -2,7 +2,6 @@ package controllers
import (
"context"
"fmt"
"math/rand"
"net/http/httptest"
"time"
@@ -257,22 +256,6 @@ var _ = Context("Inside of a new namespace", func() {
})
Expect(err).ToNot(HaveOccurred())
var regOnly actionsv1alpha1.Runner
if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: registrationOnlyRunnerNameFor(name)}, &regOnly); err != nil {
logf.Log.Info(fmt.Sprintf("Failed getting registration-only runner in test: %v", err))
return -1
} else {
updated := regOnly.DeepCopy()
updated.Status.Phase = "Completed"
if err := k8sClient.Status().Patch(ctx, updated, client.MergeFrom(&regOnly)); err != nil {
logf.Log.Info(fmt.Sprintf("Failed updating registration-only runner in test: %v", err))
return -1
}
runnersList.AddOffline([]actionsv1alpha1.Runner{*updated})
}
if err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name), client.MatchingLabelsSelector{Selector: selector}); err != nil {
logf.Log.Error(err, "list runners")
return -1

View File

@@ -138,8 +138,7 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
log.Error(err, "Failed to patch statefulset", "reason", errors.ReasonForError(err))
if errors.IsInvalid(err) {
// NOTE: This might not be ideal but deal the forbidden error by recreating the statefulset
// Probably we'd better create a registration-only runner to prevent queued jobs from immediately failing.
// NOTE: This might not be ideal but is currently required to deal with the forbidden error by recreating the statefulset
//
// 2021-06-13T07:19:52.760Z ERROR actions-runner-controller.runnerset Failed to patch statefulset
// {"runnerset": "default/example-runnerset", "error": "StatefulSet.apps \"example-runnerset\" is invalid: s