mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-10 11:41:27 +00:00
Compare commits
19 Commits
actions-ru
...
v0.18.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bc6e499e4f | ||
|
|
07f822bb08 | ||
|
|
3a0332dfdc | ||
|
|
f6ab66c55b | ||
|
|
d874a5cfda | ||
|
|
c424215044 | ||
|
|
c5fdfd63db | ||
|
|
23a45eaf87 | ||
|
|
dee997b44e | ||
|
|
2929a739e3 | ||
|
|
3cccca8d09 | ||
|
|
7a7086e7aa | ||
|
|
565b14a148 | ||
|
|
ecc441de3f | ||
|
|
25335bb3c3 | ||
|
|
9b871567b1 | ||
|
|
264cf494e3 | ||
|
|
3f23501b8e | ||
|
|
5530030c67 |
@@ -292,7 +292,7 @@ A `RunnerDeployment` can scale the number of runners between `minReplicas` and `
|
||||
|
||||
**TotalNumberOfQueuedAndInProgressWorkflowRuns**
|
||||
|
||||
In the below example, `actions-runner` will pole GitHub for all pending workflows with the pole period defined by the sync period configuration. It will then scale to e.g. 3 if there're 3 pending jobs at sync time.
|
||||
In the below example, `actions-runner` will poll GitHub for all pending workflows with the poll period defined by the sync period configuration. It will then scale to e.g. 3 if there're 3 pending jobs at sync time.
|
||||
With this scaling metric we are required to define a list of repositories within our metric.
|
||||
|
||||
The scale out performance is controlled via the manager containers startup `--sync-period` argument. The default value is set to 10 minutes to prevent default deployments rate limiting themselves from the GitHub API.
|
||||
@@ -349,7 +349,7 @@ spec:
|
||||
|
||||
**PercentageRunnersBusy**
|
||||
|
||||
The `HorizontalRunnerAutoscaler` will pole GitHub based on the configuration sync period for the number of busy runners which live in the RunnerDeployment's namespace and scale based on the settings
|
||||
The `HorizontalRunnerAutoscaler` will poll GitHub based on the configuration sync period for the number of busy runners which live in the RunnerDeployment's namespace and scale based on the settings
|
||||
|
||||
**Kustomize Config :** The period can be customised in the `config/default/manager_auth_proxy_patch.yaml` patch<br />
|
||||
**Helm Config :** `syncPeriod`
|
||||
|
||||
@@ -156,6 +156,7 @@ type HorizontalRunnerAutoscalerStatus struct {
|
||||
DesiredReplicas *int `json:"desiredReplicas,omitempty"`
|
||||
|
||||
// +optional
|
||||
// +nullable
|
||||
LastSuccessfulScaleOutTime *metav1.Time `json:"lastSuccessfulScaleOutTime,omitempty"`
|
||||
|
||||
// +optional
|
||||
|
||||
@@ -121,10 +121,17 @@ func (rs *RunnerSpec) ValidateRepository() error {
|
||||
|
||||
// RunnerStatus defines the observed state of Runner
|
||||
type RunnerStatus struct {
|
||||
// +optional
|
||||
Registration RunnerStatusRegistration `json:"registration"`
|
||||
Phase string `json:"phase"`
|
||||
Reason string `json:"reason"`
|
||||
Message string `json:"message"`
|
||||
// +optional
|
||||
Phase string `json:"phase,omitempty"`
|
||||
// +optional
|
||||
Reason string `json:"reason,omitempty"`
|
||||
// +optional
|
||||
Message string `json:"message,omitempty"`
|
||||
// +optional
|
||||
// +nullable
|
||||
LastRegistrationCheckTime *metav1.Time `json:"lastRegistrationCheckTime,omitempty"`
|
||||
}
|
||||
|
||||
// RunnerStatusRegistration contains runner registration status
|
||||
|
||||
@@ -715,6 +715,10 @@ func (in *RunnerSpec) DeepCopy() *RunnerSpec {
|
||||
func (in *RunnerStatus) DeepCopyInto(out *RunnerStatus) {
|
||||
*out = *in
|
||||
in.Registration.DeepCopyInto(&out.Registration)
|
||||
if in.LastRegistrationCheckTime != nil {
|
||||
in, out := &in.LastRegistrationCheckTime, &out.LastRegistrationCheckTime
|
||||
*out = (*in).DeepCopy()
|
||||
}
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunnerStatus.
|
||||
|
||||
@@ -15,7 +15,7 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.9.0
|
||||
version: 0.10.4
|
||||
|
||||
home: https://github.com/summerwind/actions-runner-controller
|
||||
|
||||
|
||||
@@ -207,6 +207,7 @@ spec:
|
||||
type: integer
|
||||
lastSuccessfulScaleOutTime:
|
||||
format: date-time
|
||||
nullable: true
|
||||
type: string
|
||||
observedGeneration:
|
||||
description: ObservedGeneration is the most recent generation observed
|
||||
|
||||
@@ -1541,6 +1541,10 @@ spec:
|
||||
status:
|
||||
description: RunnerStatus defines the observed state of Runner
|
||||
properties:
|
||||
lastRegistrationCheckTime:
|
||||
format: date-time
|
||||
nullable: true
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
phase:
|
||||
@@ -1569,11 +1573,6 @@ spec:
|
||||
- expiresAt
|
||||
- token
|
||||
type: object
|
||||
required:
|
||||
- message
|
||||
- phase
|
||||
- reason
|
||||
- registration
|
||||
type: object
|
||||
type: object
|
||||
version: v1alpha1
|
||||
|
||||
@@ -5,7 +5,7 @@ apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: {{ include "actions-runner-controller.selfsignedIssuerName" . }}
|
||||
namespace: {{ .Namespace }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
selfSigned: {}
|
||||
---
|
||||
@@ -13,7 +13,7 @@ apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: {{ include "actions-runner-controller.servingCertName" . }}
|
||||
namespace: {{ .Namespace }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
dnsNames:
|
||||
- {{ include "actions-runner-controller.webhookServiceName" . }}.{{ .Release.Namespace }}.svc
|
||||
|
||||
@@ -207,6 +207,7 @@ spec:
|
||||
type: integer
|
||||
lastSuccessfulScaleOutTime:
|
||||
format: date-time
|
||||
nullable: true
|
||||
type: string
|
||||
observedGeneration:
|
||||
description: ObservedGeneration is the most recent generation observed
|
||||
|
||||
@@ -1541,6 +1541,10 @@ spec:
|
||||
status:
|
||||
description: RunnerStatus defines the observed state of Runner
|
||||
properties:
|
||||
lastRegistrationCheckTime:
|
||||
format: date-time
|
||||
nullable: true
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
phase:
|
||||
@@ -1569,11 +1573,6 @@ spec:
|
||||
- expiresAt
|
||||
- token
|
||||
type: object
|
||||
required:
|
||||
- message
|
||||
- phase
|
||||
- reason
|
||||
- registration
|
||||
type: object
|
||||
type: object
|
||||
version: v1alpha1
|
||||
|
||||
@@ -34,7 +34,7 @@ func getValueAvailableAt(now time.Time, from, to *time.Time, reservedValue int)
|
||||
return &reservedValue
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) getDesiredReplicasFromCache(hra v1alpha1.HorizontalRunnerAutoscaler) *int {
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) fetchSuggestedReplicasFromCache(hra v1alpha1.HorizontalRunnerAutoscaler) *int {
|
||||
var entry *v1alpha1.CacheEntry
|
||||
|
||||
for i := range hra.Status.CacheEntries {
|
||||
@@ -63,7 +63,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) getDesiredReplicasFromCache(hra v
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
if hra.Spec.MinReplicas == nil {
|
||||
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing minReplicas", hra.Namespace, hra.Name)
|
||||
} else if hra.Spec.MaxReplicas == nil {
|
||||
@@ -71,16 +71,22 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
||||
}
|
||||
|
||||
metrics := hra.Spec.Metrics
|
||||
if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
|
||||
return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
|
||||
if len(metrics) == 0 {
|
||||
if len(hra.Spec.ScaleUpTriggers) == 0 {
|
||||
return r.suggestReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
|
||||
return r.suggestReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
|
||||
} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy {
|
||||
return r.calculateReplicasByPercentageRunnersBusy(rd, hra)
|
||||
return r.suggestReplicasByPercentageRunnersBusy(rd, hra)
|
||||
} else {
|
||||
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
|
||||
var repos [][]string
|
||||
metrics := hra.Spec.Metrics
|
||||
@@ -95,7 +101,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInPro
|
||||
// we assume that the desired replicas should always be `minReplicas + capacityReservedThroughWebhook`.
|
||||
// See https://github.com/summerwind/actions-runner-controller/issues/377#issuecomment-793372693
|
||||
if len(metrics) == 0 {
|
||||
return hra.Spec.MinReplicas, nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if len(metrics[0].RepositoryNames) == 0 {
|
||||
@@ -172,28 +178,10 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInPro
|
||||
}
|
||||
}
|
||||
|
||||
minReplicas := *hra.Spec.MinReplicas
|
||||
maxReplicas := *hra.Spec.MaxReplicas
|
||||
necessaryReplicas := queued + inProgress
|
||||
|
||||
var desiredReplicas int
|
||||
|
||||
if necessaryReplicas < minReplicas {
|
||||
desiredReplicas = minReplicas
|
||||
} else if necessaryReplicas > maxReplicas {
|
||||
desiredReplicas = maxReplicas
|
||||
} else {
|
||||
desiredReplicas = necessaryReplicas
|
||||
}
|
||||
|
||||
rd.Status.Replicas = &desiredReplicas
|
||||
replicas := desiredReplicas
|
||||
|
||||
r.Log.V(1).Info(
|
||||
"Calculated desired replicas",
|
||||
"computed_replicas_desired", desiredReplicas,
|
||||
"spec_replicas_min", minReplicas,
|
||||
"spec_replicas_max", maxReplicas,
|
||||
fmt.Sprintf("Suggested desired replicas of %d by TotalNumberOfQueuedAndInProgressWorkflowRuns", necessaryReplicas),
|
||||
"workflow_runs_completed", completed,
|
||||
"workflow_runs_in_progress", inProgress,
|
||||
"workflow_runs_queued", queued,
|
||||
@@ -203,13 +191,11 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInPro
|
||||
"horizontal_runner_autoscaler", hra.Name,
|
||||
)
|
||||
|
||||
return &replicas, nil
|
||||
return &necessaryReplicas, nil
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
ctx := context.Background()
|
||||
minReplicas := *hra.Spec.MinReplicas
|
||||
maxReplicas := *hra.Spec.MaxReplicas
|
||||
metrics := hra.Spec.Metrics[0]
|
||||
scaleUpThreshold := defaultScaleUpThreshold
|
||||
scaleDownThreshold := defaultScaleDownThreshold
|
||||
@@ -357,21 +343,13 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunn
|
||||
desiredReplicas = *rd.Spec.Replicas
|
||||
}
|
||||
|
||||
if desiredReplicas < minReplicas {
|
||||
desiredReplicas = minReplicas
|
||||
} else if desiredReplicas > maxReplicas {
|
||||
desiredReplicas = maxReplicas
|
||||
}
|
||||
|
||||
// NOTES for operators:
|
||||
//
|
||||
// - num_runners can be as twice as large as replicas_desired_before while
|
||||
// the runnerdeployment controller is replacing RunnerReplicaSet for runner update.
|
||||
|
||||
r.Log.V(1).Info(
|
||||
"Calculated desired replicas",
|
||||
"replicas_min", minReplicas,
|
||||
"replicas_max", maxReplicas,
|
||||
fmt.Sprintf("Suggested desired replicas of %d by PercentageRunnersBusy", desiredReplicas),
|
||||
"replicas_desired_before", desiredReplicasBefore,
|
||||
"replicas_desired", desiredReplicas,
|
||||
"num_runners", numRunners,
|
||||
@@ -385,8 +363,5 @@ func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunn
|
||||
"repository", repository,
|
||||
)
|
||||
|
||||
rd.Status.Replicas = &desiredReplicas
|
||||
replicas := desiredReplicas
|
||||
|
||||
return &replicas, nil
|
||||
return &desiredReplicas, nil
|
||||
}
|
||||
|
||||
@@ -224,7 +224,7 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, err := h.computeReplicas(rd, hra)
|
||||
got, _, _, err := h.computeReplicasWithCache(log, metav1Now.Time, rd, hra)
|
||||
if err != nil {
|
||||
if tc.err == "" {
|
||||
t.Fatalf("unexpected error: expected none, got %v", err)
|
||||
@@ -234,12 +234,8 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
if got == nil {
|
||||
t.Fatalf("unexpected value of rs.Spec.Replicas: nil")
|
||||
}
|
||||
|
||||
if *got != tc.want {
|
||||
t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got)
|
||||
if got != tc.want {
|
||||
t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -424,6 +420,8 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
||||
_ = v1alpha1.AddToScheme(scheme)
|
||||
|
||||
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
server := fake.NewServer(
|
||||
fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns, tc.workflowRuns_queued, tc.workflowRuns_in_progress),
|
||||
fake.WithListWorkflowJobsResponse(200, tc.workflowJobs),
|
||||
@@ -485,7 +483,7 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
got, err := h.computeReplicas(rd, hra)
|
||||
got, _, _, err := h.computeReplicasWithCache(log, metav1Now.Time, rd, hra)
|
||||
if err != nil {
|
||||
if tc.err == "" {
|
||||
t.Fatalf("unexpected error: expected none, got %v", err)
|
||||
@@ -495,12 +493,8 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
if got == nil {
|
||||
t.Fatalf("unexpected value of rs.Spec.Replicas: nil, wanted %v", tc.want)
|
||||
}
|
||||
|
||||
if *got != tc.want {
|
||||
t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got)
|
||||
if got != tc.want {
|
||||
t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ package controllers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"time"
|
||||
|
||||
"github.com/summerwind/actions-runner-controller/github"
|
||||
@@ -30,10 +31,10 @@ import (
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
"github.com/summerwind/actions-runner-controller/controllers/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -52,6 +53,8 @@ type HorizontalRunnerAutoscalerReconciler struct {
|
||||
Name string
|
||||
}
|
||||
|
||||
const defaultReplicas = 1
|
||||
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers,verbs=get;list;watch;create;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers/finalizers,verbs=get;list;watch;create;update;patch;delete
|
||||
@@ -71,6 +74,8 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
metrics.SetHorizontalRunnerAutoscalerSpec(hra.ObjectMeta, hra.Spec)
|
||||
|
||||
var rd v1alpha1.RunnerDeployment
|
||||
if err := r.Get(ctx, types.NamespacedName{
|
||||
Namespace: req.Namespace,
|
||||
@@ -83,41 +88,18 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
var replicas *int
|
||||
|
||||
replicasFromCache := r.getDesiredReplicasFromCache(hra)
|
||||
|
||||
if replicasFromCache != nil {
|
||||
replicas = replicasFromCache
|
||||
} else {
|
||||
var err error
|
||||
|
||||
replicas, err = r.computeReplicas(rd, hra)
|
||||
if err != nil {
|
||||
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
||||
|
||||
log.Error(err, "Could not compute replicas")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
}
|
||||
|
||||
const defaultReplicas = 1
|
||||
|
||||
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
|
||||
newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)
|
||||
|
||||
now := time.Now()
|
||||
|
||||
for _, reservation := range hra.Spec.CapacityReservations {
|
||||
if reservation.ExpirationTime.Time.After(now) {
|
||||
newDesiredReplicas += reservation.Replicas
|
||||
}
|
||||
newDesiredReplicas, computedReplicas, computedReplicasFromCache, err := r.computeReplicasWithCache(log, now, rd, hra)
|
||||
if err != nil {
|
||||
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
||||
|
||||
log.Error(err, "Could not compute replicas")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
if hra.Spec.MaxReplicas != nil && *hra.Spec.MaxReplicas < newDesiredReplicas {
|
||||
newDesiredReplicas = *hra.Spec.MaxReplicas
|
||||
}
|
||||
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
|
||||
|
||||
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
||||
if currentDesiredReplicas != newDesiredReplicas {
|
||||
@@ -143,7 +125,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
updated.Status.DesiredReplicas = &newDesiredReplicas
|
||||
}
|
||||
|
||||
if replicasFromCache == nil {
|
||||
if computedReplicasFromCache == nil {
|
||||
if updated == nil {
|
||||
updated = hra.DeepCopy()
|
||||
}
|
||||
@@ -160,12 +142,14 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
|
||||
|
||||
updated.Status.CacheEntries = append(cacheEntries, v1alpha1.CacheEntry{
|
||||
Key: v1alpha1.CacheEntryKeyDesiredReplicas,
|
||||
Value: *replicas,
|
||||
Value: computedReplicas,
|
||||
ExpirationTime: metav1.Time{Time: time.Now().Add(cacheDuration)},
|
||||
})
|
||||
}
|
||||
|
||||
if updated != nil {
|
||||
metrics.SetHorizontalRunnerAutoscalerStatus(updated.ObjectMeta, updated.Status)
|
||||
|
||||
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&hra)); err != nil {
|
||||
return ctrl.Result{}, fmt.Errorf("patching horizontalrunnerautoscaler status to add cache entry: %w", err)
|
||||
}
|
||||
@@ -200,14 +184,59 @@ func (r *HorizontalRunnerAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager
|
||||
Complete(r)
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) computeReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
var computedReplicas *int
|
||||
|
||||
replicas, err := r.determineDesiredReplicas(rd, hra)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr.Logger, now time.Time, rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (int, int, *int, error) {
|
||||
minReplicas := defaultReplicas
|
||||
if hra.Spec.MinReplicas != nil && *hra.Spec.MinReplicas > 0 {
|
||||
minReplicas = *hra.Spec.MinReplicas
|
||||
}
|
||||
|
||||
var suggestedReplicas int
|
||||
|
||||
suggestedReplicasFromCache := r.fetchSuggestedReplicasFromCache(hra)
|
||||
|
||||
var cached *int
|
||||
|
||||
if suggestedReplicasFromCache != nil {
|
||||
cached = suggestedReplicasFromCache
|
||||
|
||||
if cached == nil {
|
||||
suggestedReplicas = minReplicas
|
||||
} else {
|
||||
suggestedReplicas = *cached
|
||||
}
|
||||
} else {
|
||||
v, err := r.suggestDesiredReplicas(rd, hra)
|
||||
if err != nil {
|
||||
return 0, 0, nil, err
|
||||
}
|
||||
|
||||
if v == nil {
|
||||
suggestedReplicas = minReplicas
|
||||
} else {
|
||||
suggestedReplicas = *v
|
||||
}
|
||||
}
|
||||
|
||||
var reserved int
|
||||
|
||||
for _, reservation := range hra.Spec.CapacityReservations {
|
||||
if reservation.ExpirationTime.Time.After(now) {
|
||||
reserved += reservation.Replicas
|
||||
}
|
||||
}
|
||||
|
||||
newDesiredReplicas := suggestedReplicas + reserved
|
||||
|
||||
if newDesiredReplicas < minReplicas {
|
||||
newDesiredReplicas = minReplicas
|
||||
} else if hra.Spec.MaxReplicas != nil && newDesiredReplicas > *hra.Spec.MaxReplicas {
|
||||
newDesiredReplicas = *hra.Spec.MaxReplicas
|
||||
}
|
||||
|
||||
//
|
||||
// Delay scaling-down for ScaleDownDelaySecondsAfterScaleUp or DefaultScaleDownDelay
|
||||
//
|
||||
|
||||
var scaleDownDelay time.Duration
|
||||
|
||||
if hra.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
|
||||
@@ -216,17 +245,50 @@ func (r *HorizontalRunnerAutoscalerReconciler) computeReplicas(rd v1alpha1.Runne
|
||||
scaleDownDelay = DefaultScaleDownDelay
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
var scaleDownDelayUntil *time.Time
|
||||
|
||||
if hra.Status.DesiredReplicas == nil ||
|
||||
*hra.Status.DesiredReplicas < *replicas ||
|
||||
hra.Status.LastSuccessfulScaleOutTime == nil ||
|
||||
hra.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) {
|
||||
*hra.Status.DesiredReplicas < newDesiredReplicas ||
|
||||
hra.Status.LastSuccessfulScaleOutTime == nil {
|
||||
|
||||
computedReplicas = replicas
|
||||
} else if hra.Status.LastSuccessfulScaleOutTime != nil {
|
||||
t := hra.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay)
|
||||
|
||||
// ScaleDownDelay is not passed
|
||||
if t.After(now) {
|
||||
scaleDownDelayUntil = &t
|
||||
newDesiredReplicas = *hra.Status.DesiredReplicas
|
||||
}
|
||||
} else {
|
||||
computedReplicas = hra.Status.DesiredReplicas
|
||||
newDesiredReplicas = *hra.Status.DesiredReplicas
|
||||
}
|
||||
|
||||
return computedReplicas, nil
|
||||
//
|
||||
// Logs various numbers for monitoring and debugging purpose
|
||||
//
|
||||
|
||||
kvs := []interface{}{
|
||||
"suggested", suggestedReplicas,
|
||||
"reserved", reserved,
|
||||
"min", minReplicas,
|
||||
}
|
||||
|
||||
if cached != nil {
|
||||
kvs = append(kvs, "cached", *cached)
|
||||
}
|
||||
|
||||
if scaleDownDelayUntil != nil {
|
||||
kvs = append(kvs, "last_scale_up_time", *hra.Status.LastSuccessfulScaleOutTime)
|
||||
kvs = append(kvs, "scale_down_delay_until", scaleDownDelayUntil)
|
||||
}
|
||||
|
||||
if maxReplicas := hra.Spec.MaxReplicas; maxReplicas != nil {
|
||||
kvs = append(kvs, "max", *maxReplicas)
|
||||
}
|
||||
|
||||
log.V(1).Info(fmt.Sprintf("Calculated desired replicas of %d", newDesiredReplicas),
|
||||
kvs...,
|
||||
)
|
||||
|
||||
return newDesiredReplicas, suggestedReplicas, suggestedReplicasFromCache, nil
|
||||
}
|
||||
|
||||
@@ -71,7 +71,9 @@ func SetupIntegrationTest(ctx context.Context) *testEnvironment {
|
||||
err := k8sClient.Create(ctx, ns)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to create test namespace")
|
||||
|
||||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{})
|
||||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
|
||||
Namespace: ns.Name,
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to create manager")
|
||||
|
||||
responses := &fake.FixedResponses{}
|
||||
@@ -97,6 +99,21 @@ func SetupIntegrationTest(ctx context.Context) *testEnvironment {
|
||||
return fmt.Sprintf("%s%s", ns.Name, name)
|
||||
}
|
||||
|
||||
runnerController := &RunnerReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Scheme: scheme.Scheme,
|
||||
Log: logf.Log,
|
||||
Recorder: mgr.GetEventRecorderFor("runnerreplicaset-controller"),
|
||||
GitHubClient: env.ghClient,
|
||||
RunnerImage: "example/runner:test",
|
||||
DockerImage: "example/docker:test",
|
||||
Name: controllerName("runner"),
|
||||
RegistrationRecheckInterval: time.Millisecond,
|
||||
RegistrationRecheckJitter: time.Millisecond,
|
||||
}
|
||||
err = runnerController.SetupWithManager(mgr)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup runner controller")
|
||||
|
||||
replicasetController := &RunnerReplicaSetReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Scheme: scheme.Scheme,
|
||||
@@ -106,7 +123,7 @@ func SetupIntegrationTest(ctx context.Context) *testEnvironment {
|
||||
Name: controllerName("runnerreplicaset"),
|
||||
}
|
||||
err = replicasetController.SetupWithManager(mgr)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup controller")
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup runnerreplicaset controller")
|
||||
|
||||
deploymentsController := &RunnerDeploymentReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
@@ -116,7 +133,7 @@ func SetupIntegrationTest(ctx context.Context) *testEnvironment {
|
||||
Name: controllerName("runnnerdeployment"),
|
||||
}
|
||||
err = deploymentsController.SetupWithManager(mgr)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup controller")
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup runnerdeployment controller")
|
||||
|
||||
autoscalerController := &HorizontalRunnerAutoscalerReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
@@ -128,7 +145,7 @@ func SetupIntegrationTest(ctx context.Context) *testEnvironment {
|
||||
Name: controllerName("horizontalrunnerautoscaler"),
|
||||
}
|
||||
err = autoscalerController.SetupWithManager(mgr)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup controller")
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to setup autoscaler controller")
|
||||
|
||||
autoscalerWebhook := &HorizontalRunnerAutoscalerGitHubWebhook{
|
||||
Client: mgr.GetClient(),
|
||||
@@ -322,7 +339,11 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(3),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
Metrics: nil,
|
||||
Metrics: []actionsv1alpha1.MetricSpec{
|
||||
{
|
||||
Type: actionsv1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns,
|
||||
},
|
||||
},
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
@@ -447,7 +468,11 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(5),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
Metrics: nil,
|
||||
Metrics: []actionsv1alpha1.MetricSpec{
|
||||
{
|
||||
Type: actionsv1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns,
|
||||
},
|
||||
},
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
@@ -467,10 +492,9 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
env.SyncRunnerRegistrations()
|
||||
ExpectRunnerCountEventuallyEquals(ctx, ns.Name, 3)
|
||||
}
|
||||
|
||||
// Scale-up to 4 replicas on first check_run create webhook event
|
||||
@@ -478,19 +502,123 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
env.SendOrgCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 4, "runners after first webhook event")
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(4, "count of fake list runners")
|
||||
env.SyncRunnerRegistrations()
|
||||
ExpectRunnerCountEventuallyEquals(ctx, ns.Name, 4)
|
||||
}
|
||||
|
||||
// Scale-up to 5 replicas on second check_run create webhook event
|
||||
{
|
||||
env.SendOrgCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 5, "runners after second webhook event")
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(5, "count of fake list runners")
|
||||
env.SyncRunnerRegistrations()
|
||||
ExpectRunnerCountEventuallyEquals(ctx, ns.Name, 5)
|
||||
}
|
||||
})
|
||||
|
||||
It("should create and scale organization's repository runners only on check_run event", func() {
|
||||
name := "example-runnerdeploy"
|
||||
|
||||
{
|
||||
rd := &actionsv1alpha1.RunnerDeployment{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns.Name,
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerDeploymentSpec{
|
||||
Replicas: intPtr(1),
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Template: actionsv1alpha1.RunnerTemplate{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Group: "baz",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(5, "count of fake list runners")
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
// Scale-up to 3 replicas by the default TotalNumberOfQueuedAndInProgressWorkflowRuns-based scaling
|
||||
// See workflowRunsFor3Replicas_queued and workflowRunsFor3Replicas_in_progress for GitHub List-Runners API responses
|
||||
// used while testing.
|
||||
{
|
||||
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns.Name,
|
||||
},
|
||||
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||
Name: name,
|
||||
},
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(5),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
CheckRun: &actionsv1alpha1.CheckRunSpec{
|
||||
Types: []string{"created"},
|
||||
Status: "pending",
|
||||
},
|
||||
},
|
||||
Amount: 1,
|
||||
Duration: metav1.Duration{Duration: time.Minute},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ExpectCreate(ctx, hra, "test HorizontalRunnerAutoscaler")
|
||||
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
// Scale-up to 2 replicas on first check_run create webhook event
|
||||
{
|
||||
env.SendOrgCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||
}
|
||||
|
||||
// Scale-up to 3 replicas on second check_run create webhook event
|
||||
{
|
||||
env.SendOrgCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
|
||||
}
|
||||
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
})
|
||||
|
||||
It("should create and scale user's repository runners on pull_request event", func() {
|
||||
@@ -554,7 +682,11 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(3),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
Metrics: nil,
|
||||
Metrics: []actionsv1alpha1.MetricSpec{
|
||||
{
|
||||
Type: actionsv1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns,
|
||||
},
|
||||
},
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
@@ -622,6 +754,99 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
}
|
||||
})
|
||||
|
||||
It("should create and scale user's repository runners only on pull_request event", func() {
|
||||
name := "example-runnerdeploy"
|
||||
|
||||
{
|
||||
rd := &actionsv1alpha1.RunnerDeployment{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns.Name,
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerDeploymentSpec{
|
||||
Replicas: intPtr(1),
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Template: actionsv1alpha1.RunnerTemplate{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Group: "baz",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns.Name,
|
||||
},
|
||||
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||
Name: name,
|
||||
},
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(3),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
PullRequest: &actionsv1alpha1.PullRequestSpec{
|
||||
Types: []string{"created"},
|
||||
Branches: []string{"main"},
|
||||
},
|
||||
},
|
||||
Amount: 1,
|
||||
Duration: metav1.Duration{Duration: time.Minute},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ExpectCreate(ctx, hra, "test HorizontalRunnerAutoscaler")
|
||||
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake runners after HRA creation")
|
||||
}
|
||||
|
||||
// Scale-up to 2 replicas on first pull_request create webhook event
|
||||
{
|
||||
env.SendUserPullRequestEvent("test", "valid", "main", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||
ExpectHRADesiredReplicasEquals(ctx, ns.Name, name, 2, "runner deployment desired replicas")
|
||||
}
|
||||
|
||||
// Scale-up to 3 replicas on second pull_request create webhook event
|
||||
{
|
||||
env.SendUserPullRequestEvent("test", "valid", "main", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
|
||||
ExpectHRADesiredReplicasEquals(ctx, ns.Name, name, 3, "runner deployment desired replicas")
|
||||
}
|
||||
})
|
||||
|
||||
It("should create and scale user's repository runners on check_run event", func() {
|
||||
name := "example-runnerdeploy"
|
||||
|
||||
@@ -681,7 +906,11 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(5),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
Metrics: nil,
|
||||
Metrics: []actionsv1alpha1.MetricSpec{
|
||||
{
|
||||
Type: actionsv1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns,
|
||||
},
|
||||
},
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
@@ -727,6 +956,110 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(5, "count of fake list runners")
|
||||
})
|
||||
|
||||
It("should create and scale user's repository runners only on check_run event", func() {
|
||||
name := "example-runnerdeploy"
|
||||
|
||||
{
|
||||
rd := &actionsv1alpha1.RunnerDeployment{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns.Name,
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerDeploymentSpec{
|
||||
Replicas: intPtr(1),
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Template: actionsv1alpha1.RunnerTemplate{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Group: "baz",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
// Scale-up to 3 replicas by the default TotalNumberOfQueuedAndInProgressWorkflowRuns-based scaling
|
||||
// See workflowRunsFor3Replicas_queued and workflowRunsFor3Replicas_in_progress for GitHub List-Runners API responses
|
||||
// used while testing.
|
||||
{
|
||||
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns.Name,
|
||||
},
|
||||
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||
Name: name,
|
||||
},
|
||||
MinReplicas: intPtr(1),
|
||||
MaxReplicas: intPtr(5),
|
||||
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||
{
|
||||
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||
CheckRun: &actionsv1alpha1.CheckRunSpec{
|
||||
Types: []string{"created"},
|
||||
Status: "pending",
|
||||
},
|
||||
},
|
||||
Amount: 1,
|
||||
Duration: metav1.Duration{Duration: time.Minute},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ExpectCreate(ctx, hra, "test HorizontalRunnerAutoscaler")
|
||||
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
// Scale-up to 2 replicas on first check_run create webhook event
|
||||
{
|
||||
env.SendUserCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||
}
|
||||
|
||||
// Scale-up to 3 replicas on second check_run create webhook event
|
||||
{
|
||||
env.SendUserCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
|
||||
}
|
||||
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
})
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
@@ -911,6 +1244,44 @@ func ExpectRunnerSetsCountEventuallyEquals(ctx context.Context, ns string, count
|
||||
time.Second*10, time.Millisecond*500).Should(BeEquivalentTo(count), optionalDescription...)
|
||||
}
|
||||
|
||||
func ExpectRunnerCountEventuallyEquals(ctx context.Context, ns string, count int, optionalDescription ...interface{}) {
|
||||
runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
|
||||
|
||||
EventuallyWithOffset(
|
||||
1,
|
||||
func() int {
|
||||
err := k8sClient.List(ctx, &runners, client.InNamespace(ns))
|
||||
if err != nil {
|
||||
logf.Log.Error(err, "list runner sets")
|
||||
}
|
||||
|
||||
var running int
|
||||
|
||||
for _, r := range runners.Items {
|
||||
if r.Status.Phase == string(corev1.PodRunning) {
|
||||
running++
|
||||
} else {
|
||||
var pod corev1.Pod
|
||||
if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns, Name: r.Name}, &pod); err != nil {
|
||||
logf.Log.Error(err, "simulating pod controller")
|
||||
continue
|
||||
}
|
||||
|
||||
copy := pod.DeepCopy()
|
||||
copy.Status.Phase = corev1.PodRunning
|
||||
|
||||
if err := k8sClient.Status().Patch(ctx, copy, client.MergeFrom(&pod)); err != nil {
|
||||
logf.Log.Error(err, "simulating pod controller")
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return running
|
||||
},
|
||||
time.Second*10, time.Millisecond*500).Should(BeEquivalentTo(count), optionalDescription...)
|
||||
}
|
||||
|
||||
func ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx context.Context, ns string, count int, optionalDescription ...interface{}) {
|
||||
runnerSets := actionsv1alpha1.RunnerReplicaSetList{Items: []actionsv1alpha1.RunnerReplicaSet{}}
|
||||
|
||||
|
||||
67
controllers/metrics/horizontalrunnerautoscaler.go
Normal file
67
controllers/metrics/horizontalrunnerautoscaler.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
hraName = "horizontalrunnerautoscaler"
|
||||
hraNamespace = "namespace"
|
||||
)
|
||||
|
||||
var (
|
||||
horizontalRunnerAutoscalerMetrics = []prometheus.Collector{
|
||||
horizontalRunnerAutoscalerMinReplicas,
|
||||
horizontalRunnerAutoscalerMaxReplicas,
|
||||
horizontalRunnerAutoscalerDesiredReplicas,
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
horizontalRunnerAutoscalerMinReplicas = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "horizontalrunnerautoscaler_spec_min_replicas",
|
||||
Help: "minReplicas of HorizontalRunnerAutoscaler",
|
||||
},
|
||||
[]string{hraName, hraNamespace},
|
||||
)
|
||||
horizontalRunnerAutoscalerMaxReplicas = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "horizontalrunnerautoscaler_spec_max_replicas",
|
||||
Help: "maxReplicas of HorizontalRunnerAutoscaler",
|
||||
},
|
||||
[]string{hraName, hraNamespace},
|
||||
)
|
||||
horizontalRunnerAutoscalerDesiredReplicas = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "horizontalrunnerautoscaler_status_desired_replicas",
|
||||
Help: "desiredReplicas of HorizontalRunnerAutoscaler",
|
||||
},
|
||||
[]string{hraName, hraNamespace},
|
||||
)
|
||||
)
|
||||
|
||||
func SetHorizontalRunnerAutoscalerSpec(o metav1.ObjectMeta, spec v1alpha1.HorizontalRunnerAutoscalerSpec) {
|
||||
labels := prometheus.Labels{
|
||||
hraName: o.Name,
|
||||
hraNamespace: o.Namespace,
|
||||
}
|
||||
if spec.MaxReplicas != nil {
|
||||
horizontalRunnerAutoscalerMaxReplicas.With(labels).Set(float64(*spec.MaxReplicas))
|
||||
}
|
||||
if spec.MinReplicas != nil {
|
||||
horizontalRunnerAutoscalerMinReplicas.With(labels).Set(float64(*spec.MinReplicas))
|
||||
}
|
||||
}
|
||||
|
||||
func SetHorizontalRunnerAutoscalerStatus(o metav1.ObjectMeta, status v1alpha1.HorizontalRunnerAutoscalerStatus) {
|
||||
labels := prometheus.Labels{
|
||||
hraName: o.Name,
|
||||
hraNamespace: o.Namespace,
|
||||
}
|
||||
if status.DesiredReplicas != nil {
|
||||
horizontalRunnerAutoscalerDesiredReplicas.With(labels).Set(float64(*status.DesiredReplicas))
|
||||
}
|
||||
}
|
||||
14
controllers/metrics/metrics.go
Normal file
14
controllers/metrics/metrics.go
Normal file
@@ -0,0 +1,14 @@
|
||||
// Package metrics provides the metrics of custom resources such as HRA.
|
||||
//
|
||||
// This depends on the metrics exporter of kubebuilder.
|
||||
// See https://book.kubebuilder.io/reference/metrics.html for details.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||
)
|
||||
|
||||
func init() {
|
||||
metrics.Registry.MustRegister(runnerDeploymentMetrics...)
|
||||
metrics.Registry.MustRegister(horizontalRunnerAutoscalerMetrics...)
|
||||
}
|
||||
37
controllers/metrics/runnerdeployment.go
Normal file
37
controllers/metrics/runnerdeployment.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
)
|
||||
|
||||
const (
|
||||
rdName = "runnerdeployment"
|
||||
rdNamespace = "namespace"
|
||||
)
|
||||
|
||||
var (
|
||||
runnerDeploymentMetrics = []prometheus.Collector{
|
||||
runnerDeploymentReplicas,
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
runnerDeploymentReplicas = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "runnerdeployment_spec_replicas",
|
||||
Help: "replicas of RunnerDeployment",
|
||||
},
|
||||
[]string{rdName, rdNamespace},
|
||||
)
|
||||
)
|
||||
|
||||
func SetRunnerDeployment(rd v1alpha1.RunnerDeployment) {
|
||||
labels := prometheus.Labels{
|
||||
rdName: rd.Name,
|
||||
rdNamespace: rd.Namespace,
|
||||
}
|
||||
if rd.Spec.Replicas != nil {
|
||||
runnerDeploymentReplicas.With(labels).Set(float64(*rd.Spec.Replicas))
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"fmt"
|
||||
gogithub "github.com/google/go-github/v33/github"
|
||||
"github.com/summerwind/actions-runner-controller/hash"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -51,12 +52,15 @@ const (
|
||||
// RunnerReconciler reconciles a Runner object
|
||||
type RunnerReconciler struct {
|
||||
client.Client
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
GitHubClient *github.Client
|
||||
RunnerImage string
|
||||
DockerImage string
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
GitHubClient *github.Client
|
||||
RunnerImage string
|
||||
DockerImage string
|
||||
Name string
|
||||
RegistrationRecheckInterval time.Duration
|
||||
RegistrationRecheckJitter time.Duration
|
||||
}
|
||||
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
|
||||
@@ -129,8 +133,8 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
newRunner := runner.DeepCopy()
|
||||
newRunner.ObjectMeta.Finalizers = finalizers
|
||||
|
||||
if err := r.Update(ctx, newRunner); err != nil {
|
||||
log.Error(err, "Failed to update runner")
|
||||
if err := r.Patch(ctx, newRunner, client.MergeFrom(&runner)); err != nil {
|
||||
log.Error(err, "Failed to update runner for finalizer removal")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
@@ -159,31 +163,25 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
}
|
||||
|
||||
if err := r.Create(ctx, &newPod); err != nil {
|
||||
if kerrors.IsAlreadyExists(err) {
|
||||
// Gracefully handle pod-already-exists errors due to informer cache delay.
|
||||
// Without this we got a few errors like the below on new runner pod:
|
||||
// 2021-03-16T00:23:10.116Z ERROR controller-runtime.controller Reconciler error {"controller": "runner-controller", "request": "default/example-runnerdeploy-b2g2g-j4mcp", "error": "pods \"example-runnerdeploy-b2g2g-j4mcp\" already exists"}
|
||||
log.Info(
|
||||
"Failed to create pod due to AlreadyExists error. Probably this pod has been already created in previous reconcilation but is still not in the informer cache. Will retry on pod created. If it doesn't repeat, there's no problem",
|
||||
)
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
log.Error(err, "Failed to create pod resource")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodCreated", fmt.Sprintf("Created pod '%s'", newPod.Name))
|
||||
log.Info("Created runner pod", "repository", runner.Spec.Repository)
|
||||
} else {
|
||||
// If pod has ended up succeeded we need to restart it
|
||||
// Happens e.g. when dind is in runner and run completes
|
||||
restart := pod.Status.Phase == corev1.PodSucceeded
|
||||
|
||||
if !restart && runner.Status.Phase != string(pod.Status.Phase) {
|
||||
updated := runner.DeepCopy()
|
||||
updated.Status.Phase = string(pod.Status.Phase)
|
||||
updated.Status.Reason = pod.Status.Reason
|
||||
updated.Status.Message = pod.Status.Message
|
||||
|
||||
if err := r.Status().Update(ctx, updated); err != nil {
|
||||
log.Error(err, "Failed to update runner status")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
if !pod.ObjectMeta.DeletionTimestamp.IsZero() {
|
||||
deletionTimeout := 1 * time.Minute
|
||||
currentTime := time.Now()
|
||||
@@ -191,7 +189,7 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
|
||||
if deletionDidTimeout {
|
||||
log.Info(
|
||||
"Pod failed to delete itself in a timely manner. "+
|
||||
fmt.Sprintf("Failed to delete pod within %s. ", deletionTimeout)+
|
||||
"This is typically the case when a Kubernetes node became unreachable "+
|
||||
"and the kube controller started evicting nodes. Forcefully deleting the pod to not get stuck.",
|
||||
"podDeletionTimestamp", pod.DeletionTimestamp,
|
||||
@@ -220,6 +218,10 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// If pod has ended up succeeded we need to restart it
|
||||
// Happens e.g. when dind is in runner and run completes
|
||||
restart := pod.Status.Phase == corev1.PodSucceeded
|
||||
|
||||
if pod.Status.Phase == corev1.PodRunning {
|
||||
for _, status := range pod.Status.ContainerStatuses {
|
||||
if status.Name != containerName {
|
||||
@@ -244,24 +246,61 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
var registrationRecheckDelay time.Duration
|
||||
|
||||
// all checks done below only decide whether a restart is needed
|
||||
// if a restart was already decided before, there is no need for the checks
|
||||
// saving API calls and scary log messages
|
||||
// saving API calls and scary{ log messages
|
||||
if !restart {
|
||||
registrationCheckInterval := time.Minute
|
||||
if r.RegistrationRecheckInterval > 0 {
|
||||
registrationCheckInterval = r.RegistrationRecheckInterval
|
||||
}
|
||||
|
||||
notRegistered := false
|
||||
// We want to call ListRunners GitHub Actions API only once per runner per minute.
|
||||
// This if block, in conjunction with:
|
||||
// return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
|
||||
// achieves that.
|
||||
if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil {
|
||||
nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
|
||||
now := time.Now()
|
||||
|
||||
// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
|
||||
// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
|
||||
// shorter than 1s
|
||||
requeueAfter := nextCheckTime.Sub(now) - time.Second
|
||||
if requeueAfter > 0 {
|
||||
log.Info(
|
||||
fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
|
||||
"lastRegistrationCheckTime", lastCheckTime,
|
||||
"registrationCheckInterval", registrationCheckInterval,
|
||||
)
|
||||
|
||||
// Without RequeueAfter, the controller may not retry on scheduled. Instead, it must wait until the
|
||||
// next sync period passes, which can be too much later than nextCheckTime.
|
||||
//
|
||||
// We need to requeue on this reconcilation even though we have already scheduled the initial
|
||||
// requeue previously with `return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil`.
|
||||
// Apparently, the workqueue used by controller-runtime seems to deduplicate and resets the delay on
|
||||
// other requeues- so the initial scheduled requeue may have been reset due to requeue on
|
||||
// spec/status change.
|
||||
return ctrl.Result{RequeueAfter: requeueAfter}, nil
|
||||
}
|
||||
}
|
||||
|
||||
notFound := false
|
||||
offline := false
|
||||
|
||||
runnerBusy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
||||
|
||||
currentTime := time.Now()
|
||||
|
||||
if err != nil {
|
||||
var notFoundException *github.RunnerNotFound
|
||||
var offlineException *github.RunnerOffline
|
||||
if errors.As(err, ¬FoundException) {
|
||||
log.V(1).Info("Failed to check if runner is busy. Either this runner has never been successfully registered to GitHub or it still needs more time.", "runnerName", runner.Name)
|
||||
|
||||
notRegistered = true
|
||||
notFound = true
|
||||
} else if errors.As(err, &offlineException) {
|
||||
log.V(1).Info("GitHub runner appears to be offline, waiting for runner to get online ...", "runnerName", runner.Name)
|
||||
offline = true
|
||||
} else {
|
||||
var e *gogithub.RateLimitError
|
||||
@@ -293,40 +332,96 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
||||
}
|
||||
|
||||
registrationTimeout := 10 * time.Minute
|
||||
currentTime := time.Now()
|
||||
registrationDidTimeout := currentTime.Sub(pod.CreationTimestamp.Add(registrationTimeout)) > 0
|
||||
durationAfterRegistrationTimeout := currentTime.Sub(pod.CreationTimestamp.Add(registrationTimeout))
|
||||
registrationDidTimeout := durationAfterRegistrationTimeout > 0
|
||||
|
||||
if notRegistered && registrationDidTimeout {
|
||||
log.Info(
|
||||
"Runner failed to register itself to GitHub in timely manner. "+
|
||||
"Recreating the pod to see if it resolves the issue. "+
|
||||
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
||||
"See https://github.com/summerwind/actions-runner-controller/issues/288",
|
||||
"podCreationTimestamp", pod.CreationTimestamp,
|
||||
"currentTime", currentTime,
|
||||
"configuredRegistrationTimeout", registrationTimeout,
|
||||
)
|
||||
if notFound {
|
||||
if registrationDidTimeout {
|
||||
log.Info(
|
||||
"Runner failed to register itself to GitHub in timely manner. "+
|
||||
"Recreating the pod to see if it resolves the issue. "+
|
||||
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
||||
"See https://github.com/summerwind/actions-runner-controller/issues/288",
|
||||
"podCreationTimestamp", pod.CreationTimestamp,
|
||||
"currentTime", currentTime,
|
||||
"configuredRegistrationTimeout", registrationTimeout,
|
||||
)
|
||||
|
||||
restart = true
|
||||
restart = true
|
||||
} else {
|
||||
log.V(1).Info(
|
||||
"Runner pod exists but we failed to check if runner is busy. Apparently it still needs more time.",
|
||||
"runnerName", runner.Name,
|
||||
)
|
||||
}
|
||||
} else if offline {
|
||||
if registrationDidTimeout {
|
||||
log.Info(
|
||||
"Already existing GitHub runner still appears offline . "+
|
||||
"Recreating the pod to see if it resolves the issue. "+
|
||||
"CAUTION: If you see this a lot, you should investigate the root cause. ",
|
||||
"podCreationTimestamp", pod.CreationTimestamp,
|
||||
"currentTime", currentTime,
|
||||
"configuredRegistrationTimeout", registrationTimeout,
|
||||
)
|
||||
|
||||
restart = true
|
||||
} else {
|
||||
log.V(1).Info(
|
||||
"Runner pod exists but the GitHub runner appears to be still offline. Waiting for runner to get online ...",
|
||||
"runnerName", runner.Name,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if offline && registrationDidTimeout {
|
||||
log.Info(
|
||||
"Already existing GitHub runner still appears offline . "+
|
||||
"Recreating the pod to see if it resolves the issue. "+
|
||||
"CAUTION: If you see this a lot, you should investigate the root cause. ",
|
||||
"podCreationTimestamp", pod.CreationTimestamp,
|
||||
"currentTime", currentTime,
|
||||
"configuredRegistrationTimeout", registrationTimeout,
|
||||
)
|
||||
if (notFound || offline) && !registrationDidTimeout {
|
||||
registrationRecheckJitter := 10 * time.Second
|
||||
if r.RegistrationRecheckJitter > 0 {
|
||||
registrationRecheckJitter = r.RegistrationRecheckJitter
|
||||
}
|
||||
|
||||
restart = true
|
||||
registrationRecheckDelay = registrationCheckInterval + wait.Jitter(registrationRecheckJitter, 0.1)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Don't do anything if there's no need to restart the runner
|
||||
if !restart {
|
||||
// This guard enables us to update runner.Status.Phase to `Running` only after
|
||||
// the runner is registered to GitHub.
|
||||
if registrationRecheckDelay > 0 {
|
||||
log.V(1).Info(fmt.Sprintf("Rechecking the runner registration in %s", registrationRecheckDelay))
|
||||
|
||||
updated := runner.DeepCopy()
|
||||
updated.Status.LastRegistrationCheckTime = &metav1.Time{Time: time.Now()}
|
||||
|
||||
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
|
||||
log.Error(err, "Failed to update runner status for LastRegistrationCheckTime")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
|
||||
}
|
||||
|
||||
if runner.Status.Phase != string(pod.Status.Phase) {
|
||||
if pod.Status.Phase == corev1.PodRunning {
|
||||
// Seeing this message, you can expect the runner to become `Running` soon.
|
||||
log.Info(
|
||||
"Runner appears to have registered and running.",
|
||||
"podCreationTimestamp", pod.CreationTimestamp,
|
||||
)
|
||||
}
|
||||
|
||||
updated := runner.DeepCopy()
|
||||
updated.Status.Phase = string(pod.Status.Phase)
|
||||
updated.Status.Reason = pod.Status.Reason
|
||||
updated.Status.Message = pod.Status.Message
|
||||
|
||||
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
|
||||
log.Error(err, "Failed to update runner status for Phase/Reason/Message")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
}
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
@@ -394,8 +489,8 @@ func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v
|
||||
ExpiresAt: metav1.NewTime(rt.GetExpiresAt().Time),
|
||||
}
|
||||
|
||||
if err := r.Status().Update(ctx, updated); err != nil {
|
||||
log.Error(err, "Failed to update runner status")
|
||||
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
|
||||
log.Error(err, "Failed to update runner status for Registration")
|
||||
return false, err
|
||||
}
|
||||
|
||||
@@ -699,6 +794,9 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
||||
|
||||
func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
name := "runner-controller"
|
||||
if r.Name != "" {
|
||||
name = r.Name
|
||||
}
|
||||
|
||||
r.Recorder = mgr.GetEventRecorderFor(name)
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
"github.com/summerwind/actions-runner-controller/controllers/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -77,6 +78,8 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
metrics.SetRunnerDeployment(rd)
|
||||
|
||||
var myRunnerReplicaSetList v1alpha1.RunnerReplicaSetList
|
||||
if err := r.List(ctx, &myRunnerReplicaSetList, client.InNamespace(req.Namespace), client.MatchingFields{runnerSetOwnerKey: req.Name}); err != nil {
|
||||
return ctrl.Result{}, err
|
||||
@@ -189,14 +192,28 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
if len(oldSets) > 0 {
|
||||
readyReplicas := newestSet.Status.ReadyReplicas
|
||||
|
||||
if readyReplicas < currentDesiredReplicas {
|
||||
log.WithValues("runnerreplicaset", types.NamespacedName{
|
||||
oldSetsCount := len(oldSets)
|
||||
|
||||
logWithDebugInfo := log.WithValues(
|
||||
"newest_runnerreplicaset", types.NamespacedName{
|
||||
Namespace: newestSet.Namespace,
|
||||
Name: newestSet.Name,
|
||||
}).
|
||||
Info("Waiting until the newest runner replica set to be 100% available")
|
||||
},
|
||||
"newest_runnerreplicaset_replicas_ready", readyReplicas,
|
||||
"newest_runnerreplicaset_replicas_desired", currentDesiredReplicas,
|
||||
"old_runnerreplicasets_count", oldSetsCount,
|
||||
)
|
||||
|
||||
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
|
||||
if readyReplicas < currentDesiredReplicas {
|
||||
logWithDebugInfo.
|
||||
Info("Waiting until the newest runnerreplicaset to be 100% available")
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
if oldSetsCount > 0 {
|
||||
logWithDebugInfo.
|
||||
Info("The newest runnerreplicaset is 100% available. Deleting old runnerreplicasets")
|
||||
}
|
||||
|
||||
for i := range oldSets {
|
||||
|
||||
@@ -139,7 +139,9 @@ func SetupDeploymentTest(ctx context.Context) *corev1.Namespace {
|
||||
err := k8sClient.Create(ctx, ns)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to create test namespace")
|
||||
|
||||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{})
|
||||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
|
||||
Namespace: ns.Name,
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to create manager")
|
||||
|
||||
controller := &RunnerDeploymentReconciler{
|
||||
@@ -199,7 +201,7 @@ var _ = Context("Inside of a new namespace", func() {
|
||||
},
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "foo/bar",
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
@@ -295,7 +297,7 @@ var _ = Context("Inside of a new namespace", func() {
|
||||
Replicas: intPtr(1),
|
||||
Template: actionsv1alpha1.RunnerTemplate{
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "foo/bar",
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
@@ -391,7 +393,7 @@ var _ = Context("Inside of a new namespace", func() {
|
||||
Replicas: intPtr(1),
|
||||
Template: actionsv1alpha1.RunnerTemplate{
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "foo/bar",
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
|
||||
@@ -114,13 +114,14 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
desired = 1
|
||||
}
|
||||
|
||||
log.V(0).Info("debug", "desired", desired, "available", available)
|
||||
|
||||
if available > desired {
|
||||
n := available - desired
|
||||
|
||||
// get runners that are currently not busy
|
||||
var notBusy []v1alpha1.Runner
|
||||
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "available", available, "ready", ready)
|
||||
|
||||
// get runners that are currently offline/not busy/timed-out to register
|
||||
var deletionCandidates []v1alpha1.Runner
|
||||
|
||||
for _, runner := range allRunners.Items {
|
||||
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
||||
if err != nil {
|
||||
@@ -168,35 +169,37 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
||||
"configuredRegistrationTimeout", registrationTimeout,
|
||||
)
|
||||
|
||||
notBusy = append(notBusy, runner)
|
||||
deletionCandidates = append(deletionCandidates, runner)
|
||||
}
|
||||
|
||||
// offline runners should always be a great target for scale down
|
||||
if offline {
|
||||
notBusy = append(notBusy, runner)
|
||||
deletionCandidates = append(deletionCandidates, runner)
|
||||
}
|
||||
} else if !busy {
|
||||
notBusy = append(notBusy, runner)
|
||||
deletionCandidates = append(deletionCandidates, runner)
|
||||
}
|
||||
}
|
||||
|
||||
if len(notBusy) < n {
|
||||
n = len(notBusy)
|
||||
if len(deletionCandidates) < n {
|
||||
n = len(deletionCandidates)
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
if err := r.Client.Delete(ctx, ¬Busy[i]); client.IgnoreNotFound(err) != nil {
|
||||
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
|
||||
log.Error(err, "Failed to delete runner resource")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", notBusy[i].Name))
|
||||
log.Info("Deleted runner", "runnerreplicaset", rs.ObjectMeta.Name)
|
||||
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
|
||||
log.Info("Deleted runner")
|
||||
}
|
||||
} else if desired > available {
|
||||
n := desired - available
|
||||
|
||||
log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", available, "ready", ready)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
newRunner, err := r.newRunner(rs)
|
||||
if err != nil {
|
||||
|
||||
@@ -47,7 +47,9 @@ func SetupTest(ctx context.Context) *corev1.Namespace {
|
||||
err := k8sClient.Create(ctx, ns)
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to create test namespace")
|
||||
|
||||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{})
|
||||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
|
||||
Namespace: ns.Name,
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred(), "failed to create manager")
|
||||
|
||||
runnersList = fake.NewRunnersList()
|
||||
@@ -127,7 +129,7 @@ var _ = Context("Inside of a new namespace", func() {
|
||||
},
|
||||
},
|
||||
Spec: actionsv1alpha1.RunnerSpec{
|
||||
Repository: "foo/bar",
|
||||
Repository: "test/valid",
|
||||
Image: "bar",
|
||||
Env: []corev1.EnvVar{
|
||||
{Name: "FOO", Value: "FOOVALUE"},
|
||||
|
||||
@@ -55,9 +55,17 @@ func TestAPIs(t *testing.T) {
|
||||
var _ = BeforeSuite(func(done Done) {
|
||||
logf.SetLogger(zap.LoggerTo(GinkgoWriter, true))
|
||||
|
||||
var apiServerFlags []string
|
||||
|
||||
apiServerFlags = append(apiServerFlags, envtest.DefaultKubeAPIServerFlags...)
|
||||
// Avoids the following error:
|
||||
// 2021-03-19T15:14:11.673+0900 ERROR controller-runtime.controller Reconciler error {"controller": "testns-tvjzjrunner", "request": "testns-gdnyx/example-runnerdeploy-zps4z-j5562", "error": "Pod \"example-runnerdeploy-zps4z-j5562\" is invalid: [spec.containers[1].image: Required value, spec.containers[1].securityContext.privileged: Forbidden: disallowed by cluster policy]"}
|
||||
apiServerFlags = append(apiServerFlags, "--allow-privileged=true")
|
||||
|
||||
By("bootstrapping test environment")
|
||||
testEnv = &envtest.Environment{
|
||||
CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")},
|
||||
CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")},
|
||||
KubeAPIServerFlags: apiServerFlags,
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
@@ -158,7 +158,7 @@ func (c *Client) ListRunners(ctx context.Context, enterprise, org, repo string)
|
||||
|
||||
var runners []*github.Runner
|
||||
|
||||
opts := github.ListOptions{PerPage: 10}
|
||||
opts := github.ListOptions{PerPage: 100}
|
||||
for {
|
||||
list, res, err := c.listRunners(ctx, enterprise, owner, repo, &opts)
|
||||
|
||||
|
||||
32
main.go
32
main.go
@@ -41,8 +41,8 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
scheme = runtime.NewScheme()
|
||||
setupLog = ctrl.Log.WithName("setup")
|
||||
scheme = runtime.NewScheme()
|
||||
log = ctrl.Log.WithName("actions-runner-controller")
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -109,13 +109,13 @@ func main() {
|
||||
Namespace: namespace,
|
||||
})
|
||||
if err != nil {
|
||||
setupLog.Error(err, "unable to start manager")
|
||||
log.Error(err, "unable to start manager")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
runnerReconciler := &controllers.RunnerReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Log: ctrl.Log.WithName("controllers").WithName("Runner"),
|
||||
Log: log.WithName("runner"),
|
||||
Scheme: mgr.GetScheme(),
|
||||
GitHubClient: ghClient,
|
||||
RunnerImage: runnerImage,
|
||||
@@ -123,64 +123,64 @@ func main() {
|
||||
}
|
||||
|
||||
if err = runnerReconciler.SetupWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "Runner")
|
||||
log.Error(err, "unable to create controller", "controller", "Runner")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
runnerSetReconciler := &controllers.RunnerReplicaSetReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Log: ctrl.Log.WithName("controllers").WithName("RunnerReplicaSet"),
|
||||
Log: log.WithName("runnerreplicaset"),
|
||||
Scheme: mgr.GetScheme(),
|
||||
GitHubClient: ghClient,
|
||||
}
|
||||
|
||||
if err = runnerSetReconciler.SetupWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
|
||||
log.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
runnerDeploymentReconciler := &controllers.RunnerDeploymentReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Log: ctrl.Log.WithName("controllers").WithName("RunnerDeployment"),
|
||||
Log: log.WithName("runnerdeployment"),
|
||||
Scheme: mgr.GetScheme(),
|
||||
CommonRunnerLabels: commonRunnerLabels,
|
||||
}
|
||||
|
||||
if err = runnerDeploymentReconciler.SetupWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "RunnerDeployment")
|
||||
log.Error(err, "unable to create controller", "controller", "RunnerDeployment")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
horizontalRunnerAutoscaler := &controllers.HorizontalRunnerAutoscalerReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Log: ctrl.Log.WithName("controllers").WithName("HorizontalRunnerAutoscaler"),
|
||||
Log: log.WithName("horizontalrunnerautoscaler"),
|
||||
Scheme: mgr.GetScheme(),
|
||||
GitHubClient: ghClient,
|
||||
CacheDuration: syncPeriod - 10*time.Second,
|
||||
}
|
||||
|
||||
if err = horizontalRunnerAutoscaler.SetupWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
|
||||
log.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err = (&actionsv1alpha1.Runner{}).SetupWebhookWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create webhook", "webhook", "Runner")
|
||||
log.Error(err, "unable to create webhook", "webhook", "Runner")
|
||||
os.Exit(1)
|
||||
}
|
||||
if err = (&actionsv1alpha1.RunnerDeployment{}).SetupWebhookWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
|
||||
log.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
|
||||
os.Exit(1)
|
||||
}
|
||||
if err = (&actionsv1alpha1.RunnerReplicaSet{}).SetupWebhookWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
|
||||
log.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
|
||||
os.Exit(1)
|
||||
}
|
||||
// +kubebuilder:scaffold:builder
|
||||
|
||||
setupLog.Info("starting manager")
|
||||
log.Info("starting manager")
|
||||
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
|
||||
setupLog.Error(err, "problem running manager")
|
||||
log.Error(err, "problem running manager")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ func Match(pat string, s string) bool {
|
||||
|
||||
s = subs[1]
|
||||
|
||||
wildcardInHead = false
|
||||
wildcardInHead = wildcardInTail
|
||||
}
|
||||
|
||||
r := s == ""
|
||||
|
||||
@@ -195,4 +195,20 @@ func TestMatch(t *testing.T) {
|
||||
Want: false,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("actions-*-metrics == actions-workflow-metrics", func(t *testing.T) {
|
||||
run(t, testcase{
|
||||
Pattern: "actions-*-metrics",
|
||||
Target: "actions-workflow-metrics",
|
||||
Want: true,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("!actions-*-metrics == actions-workflow-metrics", func(t *testing.T) {
|
||||
run(t, testcase{
|
||||
Pattern: "!actions-*-metrics",
|
||||
Target: "actions-workflow-metrics",
|
||||
Want: false,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user