mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-14 13:46:45 +00:00
feat: Repository-wide RunnerDeployment Autoscaling (#57)
* feat: Repository-wide RunnerDeployment Autoscaling This adds `maxReplicas` and `minReplicas` to the RunnerDeploymentSpec. If and only if both fields are set, the controller computes and sets desired `replicas` automatically depending on the demand. The number of demanded runner replicas is computed by `queued workflow runs + in_progress workflow runs` for the repository. The support for organizational runners is not included. Ref https://github.com/summerwind/actions-runner-controller/issues/10
This commit is contained in:
92
controllers/autoscaling.go
Normal file
92
controllers/autoscaling.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type NotSupported struct {
|
||||
}
|
||||
|
||||
var _ error = NotSupported{}
|
||||
|
||||
func (e NotSupported) Error() string {
|
||||
return "Autoscaling is currently supported only when spec.repository is set"
|
||||
}
|
||||
|
||||
func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment) (*int, error) {
|
||||
if rd.Spec.Replicas != nil {
|
||||
return nil, fmt.Errorf("bug: determineDesiredReplicas should not be called for deplomeny with specific replicas")
|
||||
} else if rd.Spec.MinReplicas == nil {
|
||||
return nil, fmt.Errorf("runnerdeployment %s/%s is missing minReplicas", rd.Namespace, rd.Name)
|
||||
} else if rd.Spec.MaxReplicas == nil {
|
||||
return nil, fmt.Errorf("runnerdeployment %s/%s is missing maxReplicas", rd.Namespace, rd.Name)
|
||||
}
|
||||
|
||||
var replicas int
|
||||
|
||||
repoID := rd.Spec.Template.Spec.Repository
|
||||
if repoID == "" {
|
||||
return nil, NotSupported{}
|
||||
}
|
||||
|
||||
repo := strings.Split(repoID, "/")
|
||||
user, repoName := repo[0], repo[1]
|
||||
list, _, err := r.GitHubClient.Actions.ListRepositoryWorkflowRuns(context.TODO(), user, repoName, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var total, inProgress, queued, completed, unknown int
|
||||
|
||||
for _, r := range list.WorkflowRuns {
|
||||
total++
|
||||
|
||||
// In May 2020, there are only 3 statuses.
|
||||
// Follow the below links for more details:
|
||||
// - https://developer.github.com/v3/actions/workflow-runs/#list-repository-workflow-runs
|
||||
// - https://developer.github.com/v3/checks/runs/#create-a-check-run
|
||||
switch r.GetStatus() {
|
||||
case "completed":
|
||||
completed++
|
||||
case "in_progress":
|
||||
inProgress++
|
||||
case "queued":
|
||||
queued++
|
||||
default:
|
||||
unknown++
|
||||
}
|
||||
}
|
||||
|
||||
minReplicas := *rd.Spec.MinReplicas
|
||||
maxReplicas := *rd.Spec.MaxReplicas
|
||||
necessaryReplicas := queued + inProgress
|
||||
|
||||
var desiredReplicas int
|
||||
|
||||
if necessaryReplicas < minReplicas {
|
||||
desiredReplicas = minReplicas
|
||||
} else if necessaryReplicas > maxReplicas {
|
||||
desiredReplicas = maxReplicas
|
||||
} else {
|
||||
desiredReplicas = necessaryReplicas
|
||||
}
|
||||
|
||||
rd.Status.Replicas = &desiredReplicas
|
||||
replicas = desiredReplicas
|
||||
|
||||
r.Log.V(1).Info(
|
||||
"Calculated desired replicas",
|
||||
"computed_replicas_desired", desiredReplicas,
|
||||
"spec_replicas_min", minReplicas,
|
||||
"spec_replicas_max", maxReplicas,
|
||||
"workflow_runs_completed", completed,
|
||||
"workflow_runs_in_progress", inProgress,
|
||||
"workflow_runs_queued", queued,
|
||||
"workflow_runs_unknown", unknown,
|
||||
)
|
||||
|
||||
return &replicas, nil
|
||||
}
|
||||
199
controllers/autoscaling_test.go
Normal file
199
controllers/autoscaling_test.go
Normal file
@@ -0,0 +1,199 @@
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
"github.com/summerwind/actions-runner-controller/github"
|
||||
"github.com/summerwind/actions-runner-controller/github/fake"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log/zap"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func newGithubClient(server *httptest.Server) *github.Client {
|
||||
client, err := github.NewClientWithAccessToken("token")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(server.URL + "/")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
client.Client.BaseURL = baseURL
|
||||
|
||||
return client
|
||||
}
|
||||
|
||||
func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
||||
intPtr := func(v int) *int {
|
||||
return &v
|
||||
}
|
||||
|
||||
metav1Now := metav1.Now()
|
||||
testcases := []struct {
|
||||
repo string
|
||||
org string
|
||||
fixed *int
|
||||
max *int
|
||||
min *int
|
||||
sReplicas *int
|
||||
sTime *metav1.Time
|
||||
workflowRuns string
|
||||
want int
|
||||
err string
|
||||
}{
|
||||
// 3 demanded, max at 3
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(2),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
want: 3,
|
||||
},
|
||||
// 2 demanded, max at 3, currently 3, delay scaling down due to grace period
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(2),
|
||||
max: intPtr(3),
|
||||
sReplicas: intPtr(3),
|
||||
sTime: &metav1Now,
|
||||
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
want: 3,
|
||||
},
|
||||
// 3 demanded, max at 2
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(2),
|
||||
max: intPtr(2),
|
||||
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
want: 2,
|
||||
},
|
||||
// 2 demanded, min at 2
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(2),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 3, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
want: 2,
|
||||
},
|
||||
// 1 demanded, min at 2
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(2),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"queued"}, {"status":"completed"}]}"`,
|
||||
want: 2,
|
||||
},
|
||||
// 1 demanded, min at 2
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(2),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
want: 2,
|
||||
},
|
||||
// 1 demanded, min at 1
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(1),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"queued"}, {"status":"completed"}]}"`,
|
||||
want: 1,
|
||||
},
|
||||
// 1 demanded, min at 1
|
||||
{
|
||||
repo: "test/valid",
|
||||
min: intPtr(1),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
want: 1,
|
||||
},
|
||||
// fixed at 3
|
||||
{
|
||||
repo: "test/valid",
|
||||
fixed: intPtr(3),
|
||||
want: 3,
|
||||
},
|
||||
// org runner, fixed at 3
|
||||
{
|
||||
org: "test",
|
||||
fixed: intPtr(3),
|
||||
want: 3,
|
||||
},
|
||||
// org runner, 1 demanded, min at 1
|
||||
{
|
||||
org: "test",
|
||||
min: intPtr(1),
|
||||
max: intPtr(3),
|
||||
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||
err: "Autoscaling is currently supported only when spec.repository is set",
|
||||
},
|
||||
}
|
||||
|
||||
for i := range testcases {
|
||||
tc := testcases[i]
|
||||
|
||||
log := zap.New(func(o *zap.Options) {
|
||||
o.Development = true
|
||||
})
|
||||
|
||||
scheme := runtime.NewScheme()
|
||||
_ = clientgoscheme.AddToScheme(scheme)
|
||||
_ = v1alpha1.AddToScheme(scheme)
|
||||
|
||||
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
||||
server := fake.NewServer(fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns))
|
||||
defer server.Close()
|
||||
client := newGithubClient(server)
|
||||
|
||||
r := &RunnerDeploymentReconciler{
|
||||
Log: log,
|
||||
GitHubClient: client,
|
||||
Scheme: scheme,
|
||||
}
|
||||
|
||||
rd := v1alpha1.RunnerDeployment{
|
||||
TypeMeta: metav1.TypeMeta{},
|
||||
Spec: v1alpha1.RunnerDeploymentSpec{
|
||||
Template: v1alpha1.RunnerTemplate{
|
||||
Spec: v1alpha1.RunnerSpec{
|
||||
Repository: tc.repo,
|
||||
},
|
||||
},
|
||||
Replicas: tc.fixed,
|
||||
MaxReplicas: tc.max,
|
||||
MinReplicas: tc.min,
|
||||
},
|
||||
Status: v1alpha1.RunnerDeploymentStatus{
|
||||
Replicas: tc.sReplicas,
|
||||
LastSuccessfulScaleOutTime: tc.sTime,
|
||||
},
|
||||
}
|
||||
|
||||
rs, err := r.newRunnerReplicaSetWithAutoscaling(rd)
|
||||
if err != nil {
|
||||
if tc.err == "" {
|
||||
t.Fatalf("unexpected error: expected none, got %v", err)
|
||||
} else if err.Error() != tc.err {
|
||||
t.Fatalf("unexpected error: expected %v, got %v", tc.err, err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
got := rs.Spec.Replicas
|
||||
|
||||
if got == nil {
|
||||
t.Fatalf("unexpected value of rs.Spec.Replicas: nil")
|
||||
}
|
||||
|
||||
if *got != tc.want {
|
||||
t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -20,10 +20,12 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/summerwind/actions-runner-controller/github"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
"github.com/go-logr/logr"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
@@ -47,9 +49,10 @@ const (
|
||||
// RunnerDeploymentReconciler reconciles a Runner object
|
||||
type RunnerDeploymentReconciler struct {
|
||||
client.Client
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
GitHubClient *github.Client
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
}
|
||||
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;create;update;patch;delete
|
||||
@@ -94,15 +97,19 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
oldSets = myRunnerReplicaSets[1:]
|
||||
}
|
||||
|
||||
desiredRS, err := r.newRunnerReplicaSet(rd)
|
||||
desiredRS, err := r.newRunnerReplicaSetWithAutoscaling(rd)
|
||||
if err != nil {
|
||||
if _, ok := err.(NotSupported); ok {
|
||||
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetAutoScaleNotSupported", err.Error())
|
||||
}
|
||||
|
||||
log.Error(err, "Could not create runnerreplicaset")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
if newestSet == nil {
|
||||
if err := r.Client.Create(ctx, &desiredRS); err != nil {
|
||||
if err := r.Client.Create(ctx, desiredRS); err != nil {
|
||||
log.Error(err, "Failed to create runnerreplicaset resource")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
@@ -118,7 +125,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
desiredTemplateHash, ok := getTemplateHash(&desiredRS)
|
||||
desiredTemplateHash, ok := getTemplateHash(desiredRS)
|
||||
if !ok {
|
||||
log.Info("Failed to get template hash of desired runnerreplicaset resource. It must be in an invalid state. Please manually delete the runnerreplicaset so that it is recreated")
|
||||
|
||||
@@ -126,7 +133,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
}
|
||||
|
||||
if newestTemplateHash != desiredTemplateHash {
|
||||
if err := r.Client.Create(ctx, &desiredRS); err != nil {
|
||||
if err := r.Client.Create(ctx, desiredRS); err != nil {
|
||||
log.Error(err, "Failed to create runnerreplicaset resource")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
@@ -184,6 +191,23 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
}
|
||||
}
|
||||
|
||||
if rd.Spec.Replicas == nil && desiredRS.Spec.Replicas != nil {
|
||||
updated := rd.DeepCopy()
|
||||
updated.Status.Replicas = desiredRS.Spec.Replicas
|
||||
|
||||
if (rd.Status.Replicas == nil && *desiredRS.Spec.Replicas > 1) ||
|
||||
(rd.Status.Replicas != nil && *desiredRS.Spec.Replicas > *rd.Status.Replicas) {
|
||||
|
||||
updated.Status.LastSuccessfulScaleOutTime = &metav1.Time{Time: time.Now()}
|
||||
}
|
||||
|
||||
if err := r.Status().Update(ctx, updated); err != nil {
|
||||
log.Error(err, "Failed to update runnerdeployment status")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
}
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
@@ -241,7 +265,7 @@ func CloneAndAddLabel(labels map[string]string, labelKey, labelValue string) map
|
||||
return newLabels
|
||||
}
|
||||
|
||||
func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment) (v1alpha1.RunnerReplicaSet, error) {
|
||||
func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment, computedReplicas *int) (*v1alpha1.RunnerReplicaSet, error) {
|
||||
newRSTemplate := *rd.Spec.Template.DeepCopy()
|
||||
templateHash := ComputeHash(&newRSTemplate)
|
||||
// Add template hash label to selector.
|
||||
@@ -262,11 +286,15 @@ func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeplo
|
||||
},
|
||||
}
|
||||
|
||||
if err := ctrl.SetControllerReference(&rd, &rs, r.Scheme); err != nil {
|
||||
return rs, err
|
||||
if computedReplicas != nil {
|
||||
rs.Spec.Replicas = computedReplicas
|
||||
}
|
||||
|
||||
return rs, nil
|
||||
if err := ctrl.SetControllerReference(&rd, &rs, r.Scheme); err != nil {
|
||||
return &rs, err
|
||||
}
|
||||
|
||||
return &rs, nil
|
||||
}
|
||||
|
||||
func (r *RunnerDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
@@ -293,3 +321,36 @@ func (r *RunnerDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
Owns(&v1alpha1.RunnerReplicaSet{}).
|
||||
Complete(r)
|
||||
}
|
||||
|
||||
func (r *RunnerDeploymentReconciler) newRunnerReplicaSetWithAutoscaling(rd v1alpha1.RunnerDeployment) (*v1alpha1.RunnerReplicaSet, error) {
|
||||
var computedReplicas *int
|
||||
|
||||
if rd.Spec.Replicas == nil {
|
||||
replicas, err := r.determineDesiredReplicas(rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var scaleDownDelay time.Duration
|
||||
|
||||
if rd.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
|
||||
scaleDownDelay = time.Duration(*rd.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second
|
||||
} else {
|
||||
scaleDownDelay = 10 * time.Minute
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
|
||||
if rd.Status.Replicas == nil ||
|
||||
*rd.Status.Replicas < *replicas ||
|
||||
rd.Status.LastSuccessfulScaleOutTime == nil ||
|
||||
rd.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) {
|
||||
|
||||
computedReplicas = replicas
|
||||
} else {
|
||||
computedReplicas = rd.Status.Replicas
|
||||
}
|
||||
}
|
||||
|
||||
return r.newRunnerReplicaSet(rd, computedReplicas)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user