feat: HorizontalRunnerAutoscaler Webhook server (#282)

* feat: HorizontalRunnerAutoscaler Webhook server This introduces a Webhook server that responds GitHub `check_run`, `pull_request`, and `push` events by scaling up matched HorizontalRunnerAutoscaler by 1 replica. This allows you to immediately add "resource slack" for future GitHub Actions job runs, without waiting next sync period to add insufficient runners. This feature is highly inspired by https://github.com/philips-labs/terraform-aws-github-runner. terraform-aws-github-runner can manage one set of runners per deployment, where actions-runner-controller with this feature can manage as many sets of runners as you declare with HorizontalRunnerAutoscaler and RunnerDeployment pairs. On each GitHub event received, the webhook server queries repository-wide and organizational runners from the cluster and searches for the single target to scale up. The webhook server tries to match HorizontalRunnerAutoscaler.Spec.ScaleUpTriggers[].GitHubEvent.[CheckRun|Push|PullRequest] against the event and if it finds only one HRA, it is the scale target. If none or two or more targets are found for repository-wide runners, it does the same on organizational runners. Changes: * Fix integration test * Update manifests * chart: Add support for github webhook server * dockerfile: Include github-webhook-server binary * Do not import unversioned go-github * Update README
2025-12-11 03:57:01 +00:00 · 2021-02-07 17:37:27 +09:00
parent a4350d0fc2
commit ab1c39de57
31 changed files with 1993 additions and 45 deletions
--- a/controllers/horizontalrunnerautoscaler_controller.go
+++ b/controllers/horizontalrunnerautoscaler_controller.go
@@ -46,6 +46,8 @@ type HorizontalRunnerAutoscalerReconciler struct {
 	Log          logr.Logger
 	Recorder     record.EventRecorder
 	Scheme       *runtime.Scheme
+
+	CacheDuration time.Duration
 }

 // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch
@@ -79,13 +81,23 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
 		return ctrl.Result{}, nil
 	}

-	replicas, err := r.computeReplicas(rd, hra)
-	if err != nil {
-		r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
+	var replicas *int

-		log.Error(err, "Could not compute replicas")
+	replicasFromCache := r.getDesiredReplicasFromCache(hra)

-		return ctrl.Result{}, err
+	if replicasFromCache != nil {
+		replicas = replicasFromCache
+	} else {
+		var err error
+
+		replicas, err = r.computeReplicas(rd, hra)
+		if err != nil {
+			r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
+
+			log.Error(err, "Could not compute replicas")
+
+			return ctrl.Result{}, err
+		}
 	}

 	const defaultReplicas = 1
@@ -93,6 +105,18 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
 	currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
 	newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)

+	now := time.Now()
+
+	for _, reservation := range hra.Spec.CapacityReservations {
+		if reservation.ExpirationTime.Time.After(now) {
+			newDesiredReplicas += reservation.Replicas
+		}
+	}
+
+	if hra.Spec.MaxReplicas != nil && *hra.Spec.MaxReplicas < newDesiredReplicas {
+		newDesiredReplicas = *hra.Spec.MaxReplicas
+	}
+
 	// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 	if currentDesiredReplicas != newDesiredReplicas {
 		copy := rd.DeepCopy()
@@ -103,12 +127,12 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl

 			return ctrl.Result{}, err
 		}
-
-		return ctrl.Result{}, err
 	}

+	var updated *v1alpha1.HorizontalRunnerAutoscaler
+
 	if hra.Status.DesiredReplicas == nil || *hra.Status.DesiredReplicas != *replicas {
-		updated := hra.DeepCopy()
+		updated = hra.DeepCopy()

 		if (hra.Status.DesiredReplicas == nil && *replicas > 1) ||
 			(hra.Status.DesiredReplicas != nil && *replicas > *hra.Status.DesiredReplicas) {
@@ -117,7 +141,37 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
 		}

 		updated.Status.DesiredReplicas = replicas
+	}

+	if replicasFromCache == nil {
+		if updated == nil {
+			updated = hra.DeepCopy()
+		}
+
+		var cacheEntries []v1alpha1.CacheEntry
+
+		for _, ent := range updated.Status.CacheEntries {
+			if ent.ExpirationTime.Before(&metav1.Time{Time: now}) {
+				cacheEntries = append(cacheEntries, ent)
+			}
+		}
+
+		var cacheDuration time.Duration
+
+		if r.CacheDuration > 0 {
+			cacheDuration = r.CacheDuration
+		} else {
+			cacheDuration = 10 * time.Minute
+		}
+
+		updated.Status.CacheEntries = append(updated.Status.CacheEntries, v1alpha1.CacheEntry{
+			Key:            v1alpha1.CacheEntryKeyDesiredReplicas,
+			Value:          *replicas,
+			ExpirationTime: metav1.Time{Time: time.Now().Add(cacheDuration)},
+		})
+	}
+
+	if updated != nil {
 		if err := r.Status().Update(ctx, updated); err != nil {
 			log.Error(err, "Failed to update horizontalrunnerautoscaler status")