mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-11 03:57:01 +00:00
feat: HorizontalRunnerAutoscaler Webhook server (#282)
* feat: HorizontalRunnerAutoscaler Webhook server This introduces a Webhook server that responds GitHub `check_run`, `pull_request`, and `push` events by scaling up matched HorizontalRunnerAutoscaler by 1 replica. This allows you to immediately add "resource slack" for future GitHub Actions job runs, without waiting next sync period to add insufficient runners. This feature is highly inspired by https://github.com/philips-labs/terraform-aws-github-runner. terraform-aws-github-runner can manage one set of runners per deployment, where actions-runner-controller with this feature can manage as many sets of runners as you declare with HorizontalRunnerAutoscaler and RunnerDeployment pairs. On each GitHub event received, the webhook server queries repository-wide and organizational runners from the cluster and searches for the single target to scale up. The webhook server tries to match HorizontalRunnerAutoscaler.Spec.ScaleUpTriggers[].GitHubEvent.[CheckRun|Push|PullRequest] against the event and if it finds only one HRA, it is the scale target. If none or two or more targets are found for repository-wide runners, it does the same on organizational runners. Changes: * Fix integration test * Update manifests * chart: Add support for github webhook server * dockerfile: Include github-webhook-server binary * Do not import unversioned go-github * Update README
This commit is contained in:
@@ -46,6 +46,8 @@ type HorizontalRunnerAutoscalerReconciler struct {
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
|
||||
CacheDuration time.Duration
|
||||
}
|
||||
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch
|
||||
@@ -79,13 +81,23 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
replicas, err := r.computeReplicas(rd, hra)
|
||||
if err != nil {
|
||||
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
||||
var replicas *int
|
||||
|
||||
log.Error(err, "Could not compute replicas")
|
||||
replicasFromCache := r.getDesiredReplicasFromCache(hra)
|
||||
|
||||
return ctrl.Result{}, err
|
||||
if replicasFromCache != nil {
|
||||
replicas = replicasFromCache
|
||||
} else {
|
||||
var err error
|
||||
|
||||
replicas, err = r.computeReplicas(rd, hra)
|
||||
if err != nil {
|
||||
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
||||
|
||||
log.Error(err, "Could not compute replicas")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
}
|
||||
|
||||
const defaultReplicas = 1
|
||||
@@ -93,6 +105,18 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
|
||||
newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)
|
||||
|
||||
now := time.Now()
|
||||
|
||||
for _, reservation := range hra.Spec.CapacityReservations {
|
||||
if reservation.ExpirationTime.Time.After(now) {
|
||||
newDesiredReplicas += reservation.Replicas
|
||||
}
|
||||
}
|
||||
|
||||
if hra.Spec.MaxReplicas != nil && *hra.Spec.MaxReplicas < newDesiredReplicas {
|
||||
newDesiredReplicas = *hra.Spec.MaxReplicas
|
||||
}
|
||||
|
||||
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
||||
if currentDesiredReplicas != newDesiredReplicas {
|
||||
copy := rd.DeepCopy()
|
||||
@@ -103,12 +127,12 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
var updated *v1alpha1.HorizontalRunnerAutoscaler
|
||||
|
||||
if hra.Status.DesiredReplicas == nil || *hra.Status.DesiredReplicas != *replicas {
|
||||
updated := hra.DeepCopy()
|
||||
updated = hra.DeepCopy()
|
||||
|
||||
if (hra.Status.DesiredReplicas == nil && *replicas > 1) ||
|
||||
(hra.Status.DesiredReplicas != nil && *replicas > *hra.Status.DesiredReplicas) {
|
||||
@@ -117,7 +141,37 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
}
|
||||
|
||||
updated.Status.DesiredReplicas = replicas
|
||||
}
|
||||
|
||||
if replicasFromCache == nil {
|
||||
if updated == nil {
|
||||
updated = hra.DeepCopy()
|
||||
}
|
||||
|
||||
var cacheEntries []v1alpha1.CacheEntry
|
||||
|
||||
for _, ent := range updated.Status.CacheEntries {
|
||||
if ent.ExpirationTime.Before(&metav1.Time{Time: now}) {
|
||||
cacheEntries = append(cacheEntries, ent)
|
||||
}
|
||||
}
|
||||
|
||||
var cacheDuration time.Duration
|
||||
|
||||
if r.CacheDuration > 0 {
|
||||
cacheDuration = r.CacheDuration
|
||||
} else {
|
||||
cacheDuration = 10 * time.Minute
|
||||
}
|
||||
|
||||
updated.Status.CacheEntries = append(updated.Status.CacheEntries, v1alpha1.CacheEntry{
|
||||
Key: v1alpha1.CacheEntryKeyDesiredReplicas,
|
||||
Value: *replicas,
|
||||
ExpirationTime: metav1.Time{Time: time.Now().Add(cacheDuration)},
|
||||
})
|
||||
}
|
||||
|
||||
if updated != nil {
|
||||
if err := r.Status().Update(ctx, updated); err != nil {
|
||||
log.Error(err, "Failed to update horizontalrunnerautoscaler status")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user