feat: HorizontalRunnerAutoscaler Webhook server (#282)

* feat: HorizontalRunnerAutoscaler Webhook server

This introduces a Webhook server that responds GitHub `check_run`, `pull_request`, and `push` events by scaling up matched HorizontalRunnerAutoscaler by 1 replica. This allows you to immediately add "resource slack" for future GitHub Actions job runs, without waiting next sync period to add insufficient runners.

This feature is highly inspired by https://github.com/philips-labs/terraform-aws-github-runner. terraform-aws-github-runner can manage one set of runners per deployment, where actions-runner-controller with this feature can manage as many sets of runners as you declare with HorizontalRunnerAutoscaler and RunnerDeployment pairs.

On each GitHub event received, the webhook server queries repository-wide and organizational runners from the cluster and searches for the single target to scale up. The webhook server tries to match HorizontalRunnerAutoscaler.Spec.ScaleUpTriggers[].GitHubEvent.[CheckRun|Push|PullRequest] against the event and if it finds only one HRA, it is the scale target. If none or two or more targets are found for repository-wide runners, it does the same on organizational runners.

Changes:

* Fix integration test
* Update manifests
* chart: Add support for github webhook server
* dockerfile: Include github-webhook-server binary
* Do not import unversioned go-github
* Update README
This commit is contained in:
Yusuke Kuoka
2021-02-07 17:37:27 +09:00
committed by GitHub
parent a4350d0fc2
commit ab1c39de57
31 changed files with 1993 additions and 45 deletions

View File

@@ -46,6 +46,8 @@ type HorizontalRunnerAutoscalerReconciler struct {
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
CacheDuration time.Duration
}
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch
@@ -79,13 +81,23 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
return ctrl.Result{}, nil
}
replicas, err := r.computeReplicas(rd, hra)
if err != nil {
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
var replicas *int
log.Error(err, "Could not compute replicas")
replicasFromCache := r.getDesiredReplicasFromCache(hra)
return ctrl.Result{}, err
if replicasFromCache != nil {
replicas = replicasFromCache
} else {
var err error
replicas, err = r.computeReplicas(rd, hra)
if err != nil {
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
log.Error(err, "Could not compute replicas")
return ctrl.Result{}, err
}
}
const defaultReplicas = 1
@@ -93,6 +105,18 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)
now := time.Now()
for _, reservation := range hra.Spec.CapacityReservations {
if reservation.ExpirationTime.Time.After(now) {
newDesiredReplicas += reservation.Replicas
}
}
if hra.Spec.MaxReplicas != nil && *hra.Spec.MaxReplicas < newDesiredReplicas {
newDesiredReplicas = *hra.Spec.MaxReplicas
}
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
if currentDesiredReplicas != newDesiredReplicas {
copy := rd.DeepCopy()
@@ -103,12 +127,12 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
return ctrl.Result{}, err
}
return ctrl.Result{}, err
}
var updated *v1alpha1.HorizontalRunnerAutoscaler
if hra.Status.DesiredReplicas == nil || *hra.Status.DesiredReplicas != *replicas {
updated := hra.DeepCopy()
updated = hra.DeepCopy()
if (hra.Status.DesiredReplicas == nil && *replicas > 1) ||
(hra.Status.DesiredReplicas != nil && *replicas > *hra.Status.DesiredReplicas) {
@@ -117,7 +141,37 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
}
updated.Status.DesiredReplicas = replicas
}
if replicasFromCache == nil {
if updated == nil {
updated = hra.DeepCopy()
}
var cacheEntries []v1alpha1.CacheEntry
for _, ent := range updated.Status.CacheEntries {
if ent.ExpirationTime.Before(&metav1.Time{Time: now}) {
cacheEntries = append(cacheEntries, ent)
}
}
var cacheDuration time.Duration
if r.CacheDuration > 0 {
cacheDuration = r.CacheDuration
} else {
cacheDuration = 10 * time.Minute
}
updated.Status.CacheEntries = append(updated.Status.CacheEntries, v1alpha1.CacheEntry{
Key: v1alpha1.CacheEntryKeyDesiredReplicas,
Value: *replicas,
ExpirationTime: metav1.Time{Time: time.Now().Add(cacheDuration)},
})
}
if updated != nil {
if err := r.Status().Update(ctx, updated); err != nil {
log.Error(err, "Failed to update horizontalrunnerautoscaler status")