Compare commits

..

2 Commits

Author SHA1 Message Date
Yusuke Kuoka
a4876c5d03 Make EphemeralRunnerController MaxConcurrentReconciles configurable 2024-11-01 01:56:11 +00:00
Yusuke Kuoka
f58dd76763 Make EphemeralRunnerReconciler create runner pods faster and earlier 2024-09-30 05:25:43 +00:00
7 changed files with 154 additions and 32 deletions

View File

@@ -6,7 +6,7 @@ endif
DOCKER_USER ?= $(shell echo ${DOCKER_IMAGE_NAME} | cut -d / -f1) DOCKER_USER ?= $(shell echo ${DOCKER_IMAGE_NAME} | cut -d / -f1)
VERSION ?= dev VERSION ?= dev
COMMIT_SHA = $(shell git rev-parse HEAD) COMMIT_SHA = $(shell git rev-parse HEAD)
RUNNER_VERSION ?= 2.320.0 RUNNER_VERSION ?= 2.319.1
TARGETPLATFORM ?= $(shell arch) TARGETPLATFORM ?= $(shell arch)
RUNNER_NAME ?= ${DOCKER_USER}/actions-runner RUNNER_NAME ?= ${DOCKER_USER}/actions-runner
RUNNER_TAG ?= ${VERSION} RUNNER_TAG ?= ${VERSION}
@@ -310,7 +310,7 @@ github-release: release
# Otherwise we get errors like the below: # Otherwise we get errors like the below:
# Error: failed to install CRD crds/actions.summerwind.dev_runnersets.yaml: CustomResourceDefinition.apiextensions.k8s.io "runnersets.actions.summerwind.dev" is invalid: [spec.validation.openAPIV3Schema.properties[spec].properties[template].properties[spec].properties[containers].items.properties[ports].items.properties[protocol].default: Required value: this property is in x-kubernetes-list-map-keys, so it must have a default or be a required property, spec.validation.openAPIV3Schema.properties[spec].properties[template].properties[spec].properties[initContainers].items.properties[ports].items.properties[protocol].default: Required value: this property is in x-kubernetes-list-map-keys, so it must have a default or be a required property] # Error: failed to install CRD crds/actions.summerwind.dev_runnersets.yaml: CustomResourceDefinition.apiextensions.k8s.io "runnersets.actions.summerwind.dev" is invalid: [spec.validation.openAPIV3Schema.properties[spec].properties[template].properties[spec].properties[containers].items.properties[ports].items.properties[protocol].default: Required value: this property is in x-kubernetes-list-map-keys, so it must have a default or be a required property, spec.validation.openAPIV3Schema.properties[spec].properties[template].properties[spec].properties[initContainers].items.properties[ports].items.properties[protocol].default: Required value: this property is in x-kubernetes-list-map-keys, so it must have a default or be a required property]
# #
# Note that controller-gen newer than 0.6.2 is needed due to https://github.com/kubernetes-sigs/controller-tools/issues/448 # Note that controller-gen newer than 0.6.1 is needed due to https://github.com/kubernetes-sigs/controller-tools/issues/448
# Otherwise ObjectMeta embedded in Spec results in empty on the storage. # Otherwise ObjectMeta embedded in Spec results in empty on the storage.
controller-gen: controller-gen:
ifeq (, $(shell which controller-gen)) ifeq (, $(shell which controller-gen))

View File

@@ -178,7 +178,9 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if ephemeralRunner.Status.RunnerId == 0 { if ephemeralRunner.Status.RunnerId == 0 {
log.Info("Creating new ephemeral runner registration and updating status with runner config") log.Info("Creating new ephemeral runner registration and updating status with runner config")
return r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log) if r, err := r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log); r != nil {
return *r, err
}
} }
secret := new(corev1.Secret) secret := new(corev1.Secret)
@@ -189,7 +191,17 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
} }
// create secret if not created // create secret if not created
log.Info("Creating new ephemeral runner secret for jitconfig.") log.Info("Creating new ephemeral runner secret for jitconfig.")
return r.createSecret(ctx, ephemeralRunner, log) if r, err := r.createSecret(ctx, ephemeralRunner, log); r != nil {
return *r, err
}
// Retry to get the secret that was just created.
// Otherwise, even though we want to continue to create the pod,
// it fails due to the missing secret resulting in an invalid pod spec.
if err := r.Get(ctx, req.NamespacedName, secret); err != nil {
log.Error(err, "Failed to fetch secret")
return ctrl.Result{}, err
}
} }
pod := new(corev1.Pod) pod := new(corev1.Pod)
@@ -511,12 +523,12 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem
// updateStatusWithRunnerConfig fetches runtime configuration needed by the runner // updateStatusWithRunnerConfig fetches runtime configuration needed by the runner
// This method should always set .status.runnerId and .status.runnerJITConfig // This method should always set .status.runnerId and .status.runnerJITConfig
func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) { func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
// Runner is not registered with the service. We need to register it first // Runner is not registered with the service. We need to register it first
log.Info("Creating ephemeral runner JIT config") log.Info("Creating ephemeral runner JIT config")
actionsClient, err := r.actionsClientFor(ctx, ephemeralRunner) actionsClient, err := r.actionsClientFor(ctx, ephemeralRunner)
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %v", err)
} }
jitSettings := &actions.RunnerScaleSetJitRunnerSetting{ jitSettings := &actions.RunnerScaleSetJitRunnerSetting{
@@ -534,12 +546,12 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
if err != nil { if err != nil {
actionsError := &actions.ActionsError{} actionsError := &actions.ActionsError{}
if !errors.As(err, &actionsError) { if !errors.As(err, &actionsError) {
return ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %v", err)
} }
if actionsError.StatusCode != http.StatusConflict || if actionsError.StatusCode != http.StatusConflict ||
!actionsError.IsException("AgentExistsException") { !actionsError.IsException("AgentExistsException") {
return ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %v", err)
} }
// If the runner with the name we want already exists it means: // If the runner with the name we want already exists it means:
@@ -552,12 +564,12 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name) log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name)
existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name) existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name)
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get runner by name: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to get runner by name: %v", err)
} }
if existingRunner == nil { if existingRunner == nil {
log.Info("Runner with the same name does not exist, re-queuing the reconciliation") log.Info("Runner with the same name does not exist, re-queuing the reconciliation")
return ctrl.Result{Requeue: true}, nil return &ctrl.Result{Requeue: true}, nil
} }
log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId) log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId)
@@ -565,16 +577,16 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
log.Info("Removing the runner with the same name") log.Info("Removing the runner with the same name")
err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id)) err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id))
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %v", err)
} }
log.Info("Removed the runner with the same name, re-queuing the reconciliation") log.Info("Removed the runner with the same name, re-queuing the reconciliation")
return ctrl.Result{Requeue: true}, nil return &ctrl.Result{Requeue: true}, nil
} }
// TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation? // TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation?
// The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back. // The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back.
return ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %v", err) return &ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %v", err)
} }
log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id) log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id)
@@ -585,11 +597,20 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
obj.Status.RunnerJITConfig = jitConfig.EncodedJITConfig obj.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
}) })
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %v", err)
} }
// We want to continue without a requeue for faster pod creation.
//
// To do so, we update the status in-place, so that both continuing the loop and
// and requeuing and skipping updateStatusWithRunnerConfig in the next loop, will
// have the same effect.
ephemeralRunner.Status.RunnerId = jitConfig.Runner.Id
ephemeralRunner.Status.RunnerName = jitConfig.Runner.Name
ephemeralRunner.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
log.Info("Updated ephemeral runner status with runnerId and runnerJITConfig") log.Info("Updated ephemeral runner status with runnerId and runnerJITConfig")
return ctrl.Result{}, nil return nil, nil
} }
func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, log logr.Logger) (ctrl.Result, error) { func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, log logr.Logger) (ctrl.Result, error) {
@@ -665,21 +686,21 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
return ctrl.Result{}, nil return ctrl.Result{}, nil
} }
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) { func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
log.Info("Creating new secret for ephemeral runner") log.Info("Creating new secret for ephemeral runner")
jitSecret := r.ResourceBuilder.newEphemeralRunnerJitSecret(runner) jitSecret := r.ResourceBuilder.newEphemeralRunnerJitSecret(runner)
if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil { if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to set controller reference: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to set controller reference: %v", err)
} }
log.Info("Created new secret spec for ephemeral runner") log.Info("Created new secret spec for ephemeral runner")
if err := r.Create(ctx, jitSecret); err != nil { if err := r.Create(ctx, jitSecret); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create jit secret: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to create jit secret: %v", err)
} }
log.Info("Created ephemeral runner secret", "secretName", jitSecret.Name) log.Info("Created ephemeral runner secret", "secretName", jitSecret.Name)
return ctrl.Result{Requeue: true}, nil return nil, nil
} }
// updateRunStatusFromPod is responsible for updating non-exiting statuses. // updateRunStatusFromPod is responsible for updating non-exiting statuses.
@@ -823,12 +844,14 @@ func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context,
} }
// SetupWithManager sets up the controller with the Manager. // SetupWithManager sets up the controller with the Manager.
func (r *EphemeralRunnerReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *EphemeralRunnerReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error {
return ctrl.NewControllerManagedBy(mgr). return builderWithOptions(
For(&v1alpha1.EphemeralRunner{}). ctrl.NewControllerManagedBy(mgr).
Owns(&corev1.Pod{}). For(&v1alpha1.EphemeralRunner{}).
WithEventFilter(predicate.ResourceVersionChangedPredicate{}). Owns(&corev1.Pod{}).
Complete(r) WithEventFilter(predicate.ResourceVersionChangedPredicate{}),
opts,
).Complete(r)
} }
func runnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus { func runnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus {

View File

@@ -0,0 +1,90 @@
package actionsgithubcom
import (
"fmt"
"os"
"strconv"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/controller"
)
// Options is the optional configuration for the controllers, which can be
// set via command-line flags or environment variables.
type Options struct {
// RunnerMaxConcuncurrentReconciles is the maximum number of concurrent Reconciles which can be run
// by the EphemeralRunnerController.
RunnerMaxConcuncurrentReconciles int
}
// OptionsWithDefault returns the default options.
// This is here to maintain the options and their default values in one place,
// rather than having to correlate those in multiple places.
func OptionsWithDefault() Options {
return Options{
RunnerMaxConcuncurrentReconciles: 1,
}
}
// LoadEnv loads the options from the environment variables.
// This updates the option value only if the environment variable is set.
// If the option is already set (via a command-line flag), the value from the environment variable takes precedence.
func (o *Options) LoadEnv() error {
v, err := o.getEnvInt("RUNNER_MAX_CONCURRENT_RECONCILES")
if err != nil {
return err
}
if v != nil {
o.RunnerMaxConcuncurrentReconciles = *v
}
return nil
}
func (o *Options) getEnvInt(name string) (*int, error) {
s := os.Getenv(name)
if s == "" {
return nil, nil
}
v, err := strconv.Atoi(s)
if err != nil {
return nil, fmt.Errorf("failed to convert %s=%s to int: %w", name, s, err)
}
return &v, nil
}
type Option func(*controller.Options)
// WithMaxConcurrentReconciles sets the maximum number of concurrent Reconciles which can be run.
//
// This is useful to improve the throughput of the controller, but it may also increase the load on the API server and
// the external service (e.g. GitHub API). The default value is 1, as defined by the controller-runtime.
//
// See https://github.com/actions/actions-runner-controller/issues/3021 for more information
// on real-world use cases and the potential impact of this option.
func WithMaxConcurrentReconciles(n int) Option {
return func(b *controller.Options) {
b.MaxConcurrentReconciles = n
}
}
// builderWithOptions applies the given options to the provided builder, if any.
// This is a helper function to avoid the need to import the controller-runtime package in every reconciler source file
// and the command package that creates the controller.
// This is also useful for reducing code duplication around setting controller options in
// multiple reconcilers.
func builderWithOptions(b *builder.Builder, opts []Option) *builder.Builder {
if len(opts) == 0 {
return b
}
var controllerOpts controller.Options
for _, opt := range opts {
opt(&controllerOpts)
}
return b.WithOptions(controllerOpts)
}

11
main.go
View File

@@ -102,6 +102,8 @@ func main() {
autoScalerImagePullSecrets stringSlice autoScalerImagePullSecrets stringSlice
opts = actionsgithubcom.OptionsWithDefault()
commonRunnerLabels commaSeparatedStringSlice commonRunnerLabels commaSeparatedStringSlice
) )
var c github.Config var c github.Config
@@ -136,6 +138,7 @@ func main() {
flag.DurationVar(&defaultScaleDownDelay, "default-scale-down-delay", actionssummerwindnet.DefaultScaleDownDelay, "The approximate delay for a scale down followed by a scale up, used to prevent flapping (down->up->down->... loop)") flag.DurationVar(&defaultScaleDownDelay, "default-scale-down-delay", actionssummerwindnet.DefaultScaleDownDelay, "The approximate delay for a scale down followed by a scale up, used to prevent flapping (down->up->down->... loop)")
flag.IntVar(&port, "port", 9443, "The port to which the admission webhook endpoint should bind") flag.IntVar(&port, "port", 9443, "The port to which the admission webhook endpoint should bind")
flag.DurationVar(&syncPeriod, "sync-period", 1*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled.") flag.DurationVar(&syncPeriod, "sync-period", 1*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled.")
flag.IntVar(&opts.RunnerMaxConcuncurrentReconciles, "runner-max-concurrent-reconciles", opts.RunnerMaxConcuncurrentReconciles, "The maximum number of concurrent reconciles which can be run by the EphemeralRunner controller. Increase this value to improve the throughput of the controller, but it may also increase the load on the API server and the external service (e.g. GitHub API).")
flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions/actions-runner-controller/issues/321 for more information") flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions/actions-runner-controller/issues/321 for more information")
flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.") flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.")
flag.StringVar(&watchSingleNamespace, "watch-single-namespace", "", "Restrict to watch for custom resources in a single namespace.") flag.StringVar(&watchSingleNamespace, "watch-single-namespace", "", "Restrict to watch for custom resources in a single namespace.")
@@ -156,6 +159,12 @@ func main() {
} }
c.Log = &log c.Log = &log
if err := opts.LoadEnv(); err != nil {
fmt.Fprintf(os.Stderr, "Error: loading environment variables: %v\n", err)
os.Exit(1)
}
log.Info("Using options", "runner-max-concurrent-reconciles", opts.RunnerMaxConcuncurrentReconciles)
if !autoScalingRunnerSetOnly { if !autoScalingRunnerSetOnly {
ghClient, err = c.NewClient() ghClient, err = c.NewClient()
if err != nil { if err != nil {
@@ -285,7 +294,7 @@ func main() {
Scheme: mgr.GetScheme(), Scheme: mgr.GetScheme(),
ActionsClient: actionsMultiClient, ActionsClient: actionsMultiClient,
ResourceBuilder: rb, ResourceBuilder: rb,
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr, actionsgithubcom.WithMaxConcurrentReconciles(opts.RunnerMaxConcuncurrentReconciles)); err != nil {
log.Error(err, "unable to create controller", "controller", "EphemeralRunner") log.Error(err, "unable to create controller", "controller", "EphemeralRunner")
os.Exit(1) os.Exit(1)
} }

View File

@@ -6,8 +6,8 @@ DIND_ROOTLESS_RUNNER_NAME ?= ${DOCKER_USER}/actions-runner-dind-rootless
OS_IMAGE ?= ubuntu-22.04 OS_IMAGE ?= ubuntu-22.04
TARGETPLATFORM ?= $(shell arch) TARGETPLATFORM ?= $(shell arch)
RUNNER_VERSION ?= 2.320.0 RUNNER_VERSION ?= 2.319.1
RUNNER_CONTAINER_HOOKS_VERSION ?= 0.6.2 RUNNER_CONTAINER_HOOKS_VERSION ?= 0.6.1
DOCKER_VERSION ?= 24.0.7 DOCKER_VERSION ?= 24.0.7
# default list of platforms for which multiarch image is built # default list of platforms for which multiarch image is built

View File

@@ -1,2 +1,2 @@
RUNNER_VERSION=2.320.0 RUNNER_VERSION=2.319.1
RUNNER_CONTAINER_HOOKS_VERSION=0.6.2 RUNNER_CONTAINER_HOOKS_VERSION=0.6.1

View File

@@ -36,8 +36,8 @@ var (
testResultCMNamePrefix = "test-result-" testResultCMNamePrefix = "test-result-"
RunnerVersion = "2.320.0" RunnerVersion = "2.319.1"
RunnerContainerHooksVersion = "0.6.2" RunnerContainerHooksVersion = "0.6.1"
) )
// If you're willing to run this test via VS Code "run test" or "debug test", // If you're willing to run this test via VS Code "run test" or "debug test",