feat: Repository-wide RunnerDeployment Autoscaling (#57)

* feat: Repository-wide RunnerDeployment Autoscaling

This adds `maxReplicas` and `minReplicas` to the RunnerDeploymentSpec. If and only if both fields are set, the controller computes and sets desired `replicas` automatically depending on the demand.

The number of demanded runner replicas is computed by `queued workflow runs + in_progress workflow runs` for the repository. The support for organizational runners is not included.

Ref https://github.com/summerwind/actions-runner-controller/issues/10
This commit is contained in:
KUOKA Yusuke
2020-06-27 17:26:46 +09:00
committed by GitHub
parent 512cae68a1
commit 5bb2694349
12 changed files with 521 additions and 20 deletions

View File

@@ -0,0 +1,92 @@
package controllers
import (
"context"
"fmt"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
"strings"
)
type NotSupported struct {
}
var _ error = NotSupported{}
func (e NotSupported) Error() string {
return "Autoscaling is currently supported only when spec.repository is set"
}
func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment) (*int, error) {
if rd.Spec.Replicas != nil {
return nil, fmt.Errorf("bug: determineDesiredReplicas should not be called for deplomeny with specific replicas")
} else if rd.Spec.MinReplicas == nil {
return nil, fmt.Errorf("runnerdeployment %s/%s is missing minReplicas", rd.Namespace, rd.Name)
} else if rd.Spec.MaxReplicas == nil {
return nil, fmt.Errorf("runnerdeployment %s/%s is missing maxReplicas", rd.Namespace, rd.Name)
}
var replicas int
repoID := rd.Spec.Template.Spec.Repository
if repoID == "" {
return nil, NotSupported{}
}
repo := strings.Split(repoID, "/")
user, repoName := repo[0], repo[1]
list, _, err := r.GitHubClient.Actions.ListRepositoryWorkflowRuns(context.TODO(), user, repoName, nil)
if err != nil {
return nil, err
}
var total, inProgress, queued, completed, unknown int
for _, r := range list.WorkflowRuns {
total++
// In May 2020, there are only 3 statuses.
// Follow the below links for more details:
// - https://developer.github.com/v3/actions/workflow-runs/#list-repository-workflow-runs
// - https://developer.github.com/v3/checks/runs/#create-a-check-run
switch r.GetStatus() {
case "completed":
completed++
case "in_progress":
inProgress++
case "queued":
queued++
default:
unknown++
}
}
minReplicas := *rd.Spec.MinReplicas
maxReplicas := *rd.Spec.MaxReplicas
necessaryReplicas := queued + inProgress
var desiredReplicas int
if necessaryReplicas < minReplicas {
desiredReplicas = minReplicas
} else if necessaryReplicas > maxReplicas {
desiredReplicas = maxReplicas
} else {
desiredReplicas = necessaryReplicas
}
rd.Status.Replicas = &desiredReplicas
replicas = desiredReplicas
r.Log.V(1).Info(
"Calculated desired replicas",
"computed_replicas_desired", desiredReplicas,
"spec_replicas_min", minReplicas,
"spec_replicas_max", maxReplicas,
"workflow_runs_completed", completed,
"workflow_runs_in_progress", inProgress,
"workflow_runs_queued", queued,
"workflow_runs_unknown", unknown,
)
return &replicas, nil
}

View File

@@ -0,0 +1,199 @@
package controllers
import (
"fmt"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
"github.com/summerwind/actions-runner-controller/github"
"github.com/summerwind/actions-runner-controller/github/fake"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"net/http/httptest"
"net/url"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"testing"
)
func newGithubClient(server *httptest.Server) *github.Client {
client, err := github.NewClientWithAccessToken("token")
if err != nil {
panic(err)
}
baseURL, err := url.Parse(server.URL + "/")
if err != nil {
panic(err)
}
client.Client.BaseURL = baseURL
return client
}
func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
intPtr := func(v int) *int {
return &v
}
metav1Now := metav1.Now()
testcases := []struct {
repo string
org string
fixed *int
max *int
min *int
sReplicas *int
sTime *metav1.Time
workflowRuns string
want int
err string
}{
// 3 demanded, max at 3
{
repo: "test/valid",
min: intPtr(2),
max: intPtr(3),
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 3,
},
// 2 demanded, max at 3, currently 3, delay scaling down due to grace period
{
repo: "test/valid",
min: intPtr(2),
max: intPtr(3),
sReplicas: intPtr(3),
sTime: &metav1Now,
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 3,
},
// 3 demanded, max at 2
{
repo: "test/valid",
min: intPtr(2),
max: intPtr(2),
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 2,
},
// 2 demanded, min at 2
{
repo: "test/valid",
min: intPtr(2),
max: intPtr(3),
workflowRuns: `{"total_count": 3, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 2,
},
// 1 demanded, min at 2
{
repo: "test/valid",
min: intPtr(2),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"queued"}, {"status":"completed"}]}"`,
want: 2,
},
// 1 demanded, min at 2
{
repo: "test/valid",
min: intPtr(2),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`,
want: 2,
},
// 1 demanded, min at 1
{
repo: "test/valid",
min: intPtr(1),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"queued"}, {"status":"completed"}]}"`,
want: 1,
},
// 1 demanded, min at 1
{
repo: "test/valid",
min: intPtr(1),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`,
want: 1,
},
// fixed at 3
{
repo: "test/valid",
fixed: intPtr(3),
want: 3,
},
// org runner, fixed at 3
{
org: "test",
fixed: intPtr(3),
want: 3,
},
// org runner, 1 demanded, min at 1
{
org: "test",
min: intPtr(1),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`,
err: "Autoscaling is currently supported only when spec.repository is set",
},
}
for i := range testcases {
tc := testcases[i]
log := zap.New(func(o *zap.Options) {
o.Development = true
})
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = v1alpha1.AddToScheme(scheme)
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
server := fake.NewServer(fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns))
defer server.Close()
client := newGithubClient(server)
r := &RunnerDeploymentReconciler{
Log: log,
GitHubClient: client,
Scheme: scheme,
}
rd := v1alpha1.RunnerDeployment{
TypeMeta: metav1.TypeMeta{},
Spec: v1alpha1.RunnerDeploymentSpec{
Template: v1alpha1.RunnerTemplate{
Spec: v1alpha1.RunnerSpec{
Repository: tc.repo,
},
},
Replicas: tc.fixed,
MaxReplicas: tc.max,
MinReplicas: tc.min,
},
Status: v1alpha1.RunnerDeploymentStatus{
Replicas: tc.sReplicas,
LastSuccessfulScaleOutTime: tc.sTime,
},
}
rs, err := r.newRunnerReplicaSetWithAutoscaling(rd)
if err != nil {
if tc.err == "" {
t.Fatalf("unexpected error: expected none, got %v", err)
} else if err.Error() != tc.err {
t.Fatalf("unexpected error: expected %v, got %v", tc.err, err)
}
return
}
got := rs.Spec.Replicas
if got == nil {
t.Fatalf("unexpected value of rs.Spec.Replicas: nil")
}
if *got != tc.want {
t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got)
}
})
}
}

View File

@@ -20,10 +20,12 @@ import (
"context"
"fmt"
"hash/fnv"
"k8s.io/apimachinery/pkg/types"
"sort"
"time"
"github.com/summerwind/actions-runner-controller/github"
"k8s.io/apimachinery/pkg/types"
"github.com/davecgh/go-spew/spew"
"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/runtime"
@@ -47,9 +49,10 @@ const (
// RunnerDeploymentReconciler reconciles a Runner object
type RunnerDeploymentReconciler struct {
client.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
GitHubClient *github.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
}
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;create;update;patch;delete
@@ -94,15 +97,19 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
oldSets = myRunnerReplicaSets[1:]
}
desiredRS, err := r.newRunnerReplicaSet(rd)
desiredRS, err := r.newRunnerReplicaSetWithAutoscaling(rd)
if err != nil {
if _, ok := err.(NotSupported); ok {
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetAutoScaleNotSupported", err.Error())
}
log.Error(err, "Could not create runnerreplicaset")
return ctrl.Result{}, err
}
if newestSet == nil {
if err := r.Client.Create(ctx, &desiredRS); err != nil {
if err := r.Client.Create(ctx, desiredRS); err != nil {
log.Error(err, "Failed to create runnerreplicaset resource")
return ctrl.Result{}, err
@@ -118,7 +125,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
return ctrl.Result{}, nil
}
desiredTemplateHash, ok := getTemplateHash(&desiredRS)
desiredTemplateHash, ok := getTemplateHash(desiredRS)
if !ok {
log.Info("Failed to get template hash of desired runnerreplicaset resource. It must be in an invalid state. Please manually delete the runnerreplicaset so that it is recreated")
@@ -126,7 +133,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
}
if newestTemplateHash != desiredTemplateHash {
if err := r.Client.Create(ctx, &desiredRS); err != nil {
if err := r.Client.Create(ctx, desiredRS); err != nil {
log.Error(err, "Failed to create runnerreplicaset resource")
return ctrl.Result{}, err
@@ -184,6 +191,23 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
}
}
if rd.Spec.Replicas == nil && desiredRS.Spec.Replicas != nil {
updated := rd.DeepCopy()
updated.Status.Replicas = desiredRS.Spec.Replicas
if (rd.Status.Replicas == nil && *desiredRS.Spec.Replicas > 1) ||
(rd.Status.Replicas != nil && *desiredRS.Spec.Replicas > *rd.Status.Replicas) {
updated.Status.LastSuccessfulScaleOutTime = &metav1.Time{Time: time.Now()}
}
if err := r.Status().Update(ctx, updated); err != nil {
log.Error(err, "Failed to update runnerdeployment status")
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
@@ -241,7 +265,7 @@ func CloneAndAddLabel(labels map[string]string, labelKey, labelValue string) map
return newLabels
}
func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment) (v1alpha1.RunnerReplicaSet, error) {
func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment, computedReplicas *int) (*v1alpha1.RunnerReplicaSet, error) {
newRSTemplate := *rd.Spec.Template.DeepCopy()
templateHash := ComputeHash(&newRSTemplate)
// Add template hash label to selector.
@@ -262,11 +286,15 @@ func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeplo
},
}
if err := ctrl.SetControllerReference(&rd, &rs, r.Scheme); err != nil {
return rs, err
if computedReplicas != nil {
rs.Spec.Replicas = computedReplicas
}
return rs, nil
if err := ctrl.SetControllerReference(&rd, &rs, r.Scheme); err != nil {
return &rs, err
}
return &rs, nil
}
func (r *RunnerDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
@@ -293,3 +321,36 @@ func (r *RunnerDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
Owns(&v1alpha1.RunnerReplicaSet{}).
Complete(r)
}
func (r *RunnerDeploymentReconciler) newRunnerReplicaSetWithAutoscaling(rd v1alpha1.RunnerDeployment) (*v1alpha1.RunnerReplicaSet, error) {
var computedReplicas *int
if rd.Spec.Replicas == nil {
replicas, err := r.determineDesiredReplicas(rd)
if err != nil {
return nil, err
}
var scaleDownDelay time.Duration
if rd.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
scaleDownDelay = time.Duration(*rd.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second
} else {
scaleDownDelay = 10 * time.Minute
}
now := time.Now()
if rd.Status.Replicas == nil ||
*rd.Status.Replicas < *replicas ||
rd.Status.LastSuccessfulScaleOutTime == nil ||
rd.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) {
computedReplicas = replicas
} else {
computedReplicas = rd.Status.Replicas
}
}
return r.newRunnerReplicaSet(rd, computedReplicas)
}