mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-11 03:57:01 +00:00
Stop creating registration-only runners on scale-to-zero (#1028)
Resolves #859
This commit is contained in:
@@ -73,7 +73,7 @@ helm upgrade --install --namespace actions-runner-system --create-namespace \
|
|||||||
|
|
||||||
The solution supports both GHEC (GitHub Enterprise Cloud) and GHES (GitHub Enterprise Server) editions as well as regular GitHub. Both PAT (personal access token) and GitHub App authentication works for installations that will be deploying either repository level and / or organization level runners. If you need to deploy enterprise level runners then you are restricted to PAT based authentication as GitHub doesn't support GitHub App based authentication for enterprise runners currently.
|
The solution supports both GHEC (GitHub Enterprise Cloud) and GHES (GitHub Enterprise Server) editions as well as regular GitHub. Both PAT (personal access token) and GitHub App authentication works for installations that will be deploying either repository level and / or organization level runners. If you need to deploy enterprise level runners then you are restricted to PAT based authentication as GitHub doesn't support GitHub App based authentication for enterprise runners currently.
|
||||||
|
|
||||||
If you are deploying this solution into a GHES environment then you will need version >= [3.0.0](https://docs.github.com/en/enterprise-server@3.0/admin/release-notes) as a minimum, in order to use all the features of actions-runner-controller >= [3.3.0](https://docs.github.com/en/enterprise-server@3.3/admin/release-notes) is required.
|
If you are deploying this solution into a GHES environment then you will need to be running version >= [3.3.0](https://docs.github.com/en/enterprise-server@3.3/admin/release-notes).
|
||||||
|
|
||||||
When deploying the solution for a GHES environment you need to provide an additional environment variable as part of the controller deployment:
|
When deploying the solution for a GHES environment you need to provide an additional environment variable as part of the controller deployment:
|
||||||
|
|
||||||
@@ -730,8 +730,6 @@ spec:
|
|||||||
|
|
||||||
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
||||||
|
|
||||||
_Note: The controller creates a "registration-only" runner per RunnerReplicaSet when it is being scaled to zero and retains it until there are one or more runners available. This is a deprecated feature for GitHub Cloud as "registration-only" runners are no longer needed due to GitHub changing their runner [routing logic](https://docs.github.com/en/enterprise-cloud@latest/actions/hosting-your-own-runners/using-self-hosted-runners-in-a-workflow#routing-precedence-for-self-hosted-runners) to no longer fail a workflow run if it targets a runner label that there are no registered runners for._
|
|
||||||
|
|
||||||
The regular `RunnerDeployment` `replicas:` attribute as well as the `HorizontalRunnerAutoscaler` `minReplicas:` attribute supports being set to 0.
|
The regular `RunnerDeployment` `replicas:` attribute as well as the `HorizontalRunnerAutoscaler` `minReplicas:` attribute supports being set to 0.
|
||||||
|
|
||||||
The main use case for scaling from 0 is with the `HorizontalRunnerAutoscaler` kind. To scale from 0 whilst still being able to provision runners as jobs are queued we must use the `HorizontalRunnerAutoscaler` with only certain scaling configurations, only the below configurations support scaling from 0 whilst also being able to provision runners as jobs are queued:
|
The main use case for scaling from 0 is with the `HorizontalRunnerAutoscaler` kind. To scale from 0 whilst still being able to provision runners as jobs are queued we must use the `HorizontalRunnerAutoscaler` with only certain scaling configurations, only the below configurations support scaling from 0 whilst also being able to provision runners as jobs are queued:
|
||||||
@@ -745,6 +743,8 @@ The main use case for scaling from 0 is with the `HorizontalRunnerAutoscaler` ki
|
|||||||
|
|
||||||
If a HorizontalRunnerAutoscaler is configured with a secondary metric of `TotalNumberOfQueuedAndInProgressWorkflowRuns` then be aware that the controller will check the primary metric of `PercentageRunnersBusy` first and will only use the secondary metric to calculate the desired replica count if the primary metric returns 0 desired replicas.
|
If a HorizontalRunnerAutoscaler is configured with a secondary metric of `TotalNumberOfQueuedAndInProgressWorkflowRuns` then be aware that the controller will check the primary metric of `PercentageRunnersBusy` first and will only use the secondary metric to calculate the desired replica count if the primary metric returns 0 desired replicas.
|
||||||
|
|
||||||
|
Webhook-based autoscaling is the best option as it is relatively easy to configure and also it can scale scale quickly.
|
||||||
|
|
||||||
#### Scheduled Overrides
|
#### Scheduled Overrides
|
||||||
|
|
||||||
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
||||||
|
|||||||
@@ -117,6 +117,17 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
desired = 1
|
desired = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove this registration runner cleanup later (v0.23.0 or v0.24.0)
|
||||||
|
//
|
||||||
|
// We had to have a registration-only runner to support scale-from-zero before.
|
||||||
|
// But since Sep 2021 Actions update on GitHub Cloud and GHES 3.3, it is unneceesary.
|
||||||
|
// See the below issues for more contexts:
|
||||||
|
// https://github.com/actions-runner-controller/actions-runner-controller/issues/516
|
||||||
|
// https://github.com/actions-runner-controller/actions-runner-controller/issues/859
|
||||||
|
//
|
||||||
|
// In the below block, we have a logic to remove existing registration-only runners as unnecessary.
|
||||||
|
// This logic is introduced since actions-runner-controller 0.21.0 and probably last one or two minor releases
|
||||||
|
// so that actions-runner-controller instance in everyone's cluster won't leave dangling registration-only runners.
|
||||||
registrationOnlyRunnerNsName := req.NamespacedName
|
registrationOnlyRunnerNsName := req.NamespacedName
|
||||||
registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
|
registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
|
||||||
registrationOnlyRunner := v1alpha1.Runner{}
|
registrationOnlyRunner := v1alpha1.Runner{}
|
||||||
@@ -133,52 +144,11 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
registrationOnlyRunnerExists = true
|
registrationOnlyRunnerExists = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// On scale to zero, we must have fully registered registration-only runner before we start deleting other runners, hence `desired == 0`
|
if registrationOnlyRunnerExists {
|
||||||
// On scale from zero, we must retain the registratoin-only runner until one or more other runners get registered, hence `registrationOnlyRunnerExists && available == 0`.
|
if err := r.Client.Delete(ctx, ®istrationOnlyRunner); err != nil {
|
||||||
// On RunnerReplicaSet creation, it have always 0 replics and no registration-only runner.
|
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
|
||||||
// In this case We don't need to bother creating a registration-only runner which gets deleted soon after we have 1 or more available repolicas,
|
|
||||||
// hence it's not `available == 0`, but `registrationOnlyRunnerExists && available == 0`.
|
|
||||||
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/516
|
|
||||||
registrationOnlyRunnerNeeded := desired == 0 || (registrationOnlyRunnerExists && current == 0)
|
|
||||||
|
|
||||||
if registrationOnlyRunnerNeeded {
|
return ctrl.Result{Requeue: true}, nil
|
||||||
if registrationOnlyRunnerExists {
|
|
||||||
if registrationOnlyRunner.Status.Phase == "" {
|
|
||||||
log.Info("Still waiting for the registration-only runner to be registered")
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// A registration-only runner does not exist and is needed, hence create it.
|
|
||||||
|
|
||||||
runnerForScaleFromToZero, err := r.newRunner(rs)
|
|
||||||
if err != nil {
|
|
||||||
return ctrl.Result{}, fmt.Errorf("failed to create runner for scale from/to zero: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
runnerForScaleFromToZero.ObjectMeta.Name = registrationOnlyRunnerNsName.Name
|
|
||||||
runnerForScaleFromToZero.ObjectMeta.GenerateName = ""
|
|
||||||
runnerForScaleFromToZero.ObjectMeta.Labels = nil
|
|
||||||
metav1.SetMetaDataAnnotation(&runnerForScaleFromToZero.ObjectMeta, annotationKeyRegistrationOnly, "true")
|
|
||||||
|
|
||||||
if err := r.Client.Create(ctx, &runnerForScaleFromToZero); err != nil {
|
|
||||||
log.Error(err, "Failed to create runner for scale from/to zero")
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// We can continue to deleting runner pods only after the
|
|
||||||
// registration-only runner gets registered.
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// A registration-only runner exists and is not needed, hence delete it.
|
|
||||||
if registrationOnlyRunnerExists {
|
|
||||||
if err := r.Client.Delete(ctx, ®istrationOnlyRunner); err != nil {
|
|
||||||
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
|
|
||||||
|
|
||||||
return ctrl.Result{Requeue: true}, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package controllers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"time"
|
"time"
|
||||||
@@ -257,22 +256,6 @@ var _ = Context("Inside of a new namespace", func() {
|
|||||||
})
|
})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
var regOnly actionsv1alpha1.Runner
|
|
||||||
if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: registrationOnlyRunnerNameFor(name)}, ®Only); err != nil {
|
|
||||||
logf.Log.Info(fmt.Sprintf("Failed getting registration-only runner in test: %v", err))
|
|
||||||
return -1
|
|
||||||
} else {
|
|
||||||
updated := regOnly.DeepCopy()
|
|
||||||
updated.Status.Phase = "Completed"
|
|
||||||
|
|
||||||
if err := k8sClient.Status().Patch(ctx, updated, client.MergeFrom(®Only)); err != nil {
|
|
||||||
logf.Log.Info(fmt.Sprintf("Failed updating registration-only runner in test: %v", err))
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
runnersList.AddOffline([]actionsv1alpha1.Runner{*updated})
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name), client.MatchingLabelsSelector{Selector: selector}); err != nil {
|
if err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name), client.MatchingLabelsSelector{Selector: selector}); err != nil {
|
||||||
logf.Log.Error(err, "list runners")
|
logf.Log.Error(err, "list runners")
|
||||||
return -1
|
return -1
|
||||||
|
|||||||
@@ -138,8 +138,7 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
log.Error(err, "Failed to patch statefulset", "reason", errors.ReasonForError(err))
|
log.Error(err, "Failed to patch statefulset", "reason", errors.ReasonForError(err))
|
||||||
|
|
||||||
if errors.IsInvalid(err) {
|
if errors.IsInvalid(err) {
|
||||||
// NOTE: This might not be ideal but deal the forbidden error by recreating the statefulset
|
// NOTE: This might not be ideal but is currently required to deal with the forbidden error by recreating the statefulset
|
||||||
// Probably we'd better create a registration-only runner to prevent queued jobs from immediately failing.
|
|
||||||
//
|
//
|
||||||
// 2021-06-13T07:19:52.760Z ERROR actions-runner-controller.runnerset Failed to patch statefulset
|
// 2021-06-13T07:19:52.760Z ERROR actions-runner-controller.runnerset Failed to patch statefulset
|
||||||
// {"runnerset": "default/example-runnerset", "error": "StatefulSet.apps \"example-runnerset\" is invalid: s
|
// {"runnerset": "default/example-runnerset", "error": "StatefulSet.apps \"example-runnerset\" is invalid: s
|
||||||
|
|||||||
Reference in New Issue
Block a user