mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-10 19:50:30 +00:00
Provide scale-set listener metrics (#2559)
Co-authored-by: Tingluo Huang <tingluohuang@github.com> Co-authored-by: Bassem Dghaidi <568794+Link-@users.noreply.github.com>
This commit is contained in:
@@ -33,6 +33,8 @@ import (
|
||||
"sigs.k8s.io/controller-runtime/pkg/source"
|
||||
|
||||
v1alpha1 "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||
"github.com/actions/actions-runner-controller/controllers/actions.github.com/metrics"
|
||||
"github.com/actions/actions-runner-controller/github/actions"
|
||||
hash "github.com/actions/actions-runner-controller/hash"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
rbacv1 "k8s.io/api/rbac/v1"
|
||||
@@ -49,6 +51,10 @@ type AutoscalingListenerReconciler struct {
|
||||
client.Client
|
||||
Log logr.Logger
|
||||
Scheme *runtime.Scheme
|
||||
// ListenerMetricsAddr is address that the metrics endpoint binds to.
|
||||
// If it is set to "0", the metrics server is not started.
|
||||
ListenerMetricsAddr string
|
||||
ListenerMetricsEndpoint string
|
||||
|
||||
resourceBuilder resourceBuilder
|
||||
}
|
||||
@@ -227,6 +233,11 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
if err := r.publishRunningListener(autoscalingListener, false); err != nil {
|
||||
// If publish fails, URL is incorrect which means the listener pod would never be able to start
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
// Create a listener pod in the controller namespace
|
||||
log.Info("Creating a listener pod")
|
||||
return r.createListenerPod(ctx, &autoscalingRunnerSet, autoscalingListener, serviceAccount, mirrorSecret, log)
|
||||
@@ -242,6 +253,16 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
|
||||
}
|
||||
}
|
||||
|
||||
if listenerPod.Status.Phase == corev1.PodRunning {
|
||||
if err := r.publishRunningListener(autoscalingListener, true); err != nil {
|
||||
log.Error(err, "Unable to publish running listener", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
|
||||
// stop reconciling. We should never get to this point but if we do,
|
||||
// listener won't be able to start up, and the crash from the pod should
|
||||
// notify the reconciler again.
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
@@ -260,6 +281,9 @@ func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, au
|
||||
return false, nil
|
||||
case err != nil && !kerrors.IsNotFound(err):
|
||||
return false, fmt.Errorf("failed to get listener pods: %v", err)
|
||||
|
||||
default: // NOT FOUND
|
||||
_ = r.publishRunningListener(autoscalingListener, false) // If error is returned, we never published metrics so it is safe to ignore
|
||||
}
|
||||
logger.Info("Listener pod is deleted")
|
||||
|
||||
@@ -371,9 +395,22 @@ func (r *AutoscalingListenerReconciler) createListenerPod(ctx context.Context, a
|
||||
envs = append(envs, env)
|
||||
}
|
||||
|
||||
newPod := r.resourceBuilder.newScaleSetListenerPod(autoscalingListener, serviceAccount, secret, envs...)
|
||||
var metricsConfig *listenerMetricsServerConfig
|
||||
if r.ListenerMetricsAddr != "0" {
|
||||
metricsConfig = &listenerMetricsServerConfig{
|
||||
addr: r.ListenerMetricsAddr,
|
||||
endpoint: r.ListenerMetricsEndpoint,
|
||||
}
|
||||
}
|
||||
|
||||
newPod, err := r.resourceBuilder.newScaleSetListenerPod(autoscalingListener, serviceAccount, secret, metricsConfig, envs...)
|
||||
if err != nil {
|
||||
logger.Error(err, "Failed to build listener pod")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
if err := ctrl.SetControllerReference(autoscalingListener, newPod, r.Scheme); err != nil {
|
||||
logger.Error(err, "Failed to set controller reference")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
@@ -556,6 +593,30 @@ func (r *AutoscalingListenerReconciler) createRoleBindingForListener(ctx context
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
|
||||
func (r *AutoscalingListenerReconciler) publishRunningListener(autoscalingListener *v1alpha1.AutoscalingListener, isUp bool) error {
|
||||
githubConfigURL := autoscalingListener.Spec.GitHubConfigUrl
|
||||
parsedURL, err := actions.ParseGitHubConfigFromURL(githubConfigURL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
commonLabels := metrics.CommonLabels{
|
||||
Name: autoscalingListener.Name,
|
||||
Namespace: autoscalingListener.Namespace,
|
||||
Repository: parsedURL.Repository,
|
||||
Organization: parsedURL.Organization,
|
||||
Enterprise: parsedURL.Enterprise,
|
||||
}
|
||||
|
||||
if isUp {
|
||||
metrics.AddRunningListener(commonLabels)
|
||||
} else {
|
||||
metrics.SubRunningListener(commonLabels)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *AutoscalingListenerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
groupVersionIndexer := func(rawObj client.Object) []string {
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||
"github.com/actions/actions-runner-controller/controllers/actions.github.com/metrics"
|
||||
"github.com/actions/actions-runner-controller/github/actions"
|
||||
"github.com/go-logr/logr"
|
||||
"go.uber.org/multierr"
|
||||
@@ -50,6 +51,8 @@ type EphemeralRunnerSetReconciler struct {
|
||||
Scheme *runtime.Scheme
|
||||
ActionsClient actions.MultiClient
|
||||
|
||||
PublishMetrics bool
|
||||
|
||||
resourceBuilder resourceBuilder
|
||||
}
|
||||
|
||||
@@ -163,6 +166,29 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
|
||||
"deleting", len(deletingEphemeralRunners),
|
||||
)
|
||||
|
||||
if r.PublishMetrics {
|
||||
githubConfigURL := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.GitHubConfigUrl
|
||||
parsedURL, err := actions.ParseGitHubConfigFromURL(githubConfigURL)
|
||||
if err != nil {
|
||||
log.Error(err, "Github Config URL is invalid", "URL", githubConfigURL)
|
||||
// stop reconciling on this object
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
metrics.SetEphemeralRunnerCountsByStatus(
|
||||
metrics.CommonLabels{
|
||||
Name: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetName],
|
||||
Namespace: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetNamespace],
|
||||
Repository: parsedURL.Repository,
|
||||
Organization: parsedURL.Organization,
|
||||
Enterprise: parsedURL.Enterprise,
|
||||
},
|
||||
len(pendingEphemeralRunners),
|
||||
len(runningEphemeralRunners),
|
||||
len(failedEphemeralRunners),
|
||||
)
|
||||
}
|
||||
|
||||
// cleanup finished runners and proceed
|
||||
var errs []error
|
||||
for i := range finishedEphemeralRunners {
|
||||
|
||||
92
controllers/actions.github.com/metrics/metrics.go
Normal file
92
controllers/actions.github.com/metrics/metrics.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||
)
|
||||
|
||||
var githubScaleSetControllerSubsystem = "gha_controller"
|
||||
|
||||
var labels = []string{
|
||||
"name",
|
||||
"namespace",
|
||||
"repository",
|
||||
"organization",
|
||||
"enterprise",
|
||||
}
|
||||
|
||||
type CommonLabels struct {
|
||||
Name string
|
||||
Namespace string
|
||||
Repository string
|
||||
Organization string
|
||||
Enterprise string
|
||||
}
|
||||
|
||||
func (l *CommonLabels) labels() prometheus.Labels {
|
||||
return prometheus.Labels{
|
||||
"name": l.Name,
|
||||
"namespace": l.Namespace,
|
||||
"repository": l.Repository,
|
||||
"organization": l.Organization,
|
||||
"enterprise": l.Enterprise,
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
pendingEphemeralRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetControllerSubsystem,
|
||||
Name: "pending_ephemeral_runners",
|
||||
Help: "Number of ephemeral runners in a pending state.",
|
||||
},
|
||||
labels,
|
||||
)
|
||||
runningEphemeralRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetControllerSubsystem,
|
||||
Name: "running_ephemeral_runners",
|
||||
Help: "Number of ephemeral runners in a running state.",
|
||||
},
|
||||
labels,
|
||||
)
|
||||
failedEphemeralRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetControllerSubsystem,
|
||||
Name: "failed_ephemeral_runners",
|
||||
Help: "Number of ephemeral runners in a failed state.",
|
||||
},
|
||||
labels,
|
||||
)
|
||||
runningListeners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetControllerSubsystem,
|
||||
Name: "running_listeners",
|
||||
Help: "Number of listeners in a running state.",
|
||||
},
|
||||
labels,
|
||||
)
|
||||
)
|
||||
|
||||
func RegisterMetrics() {
|
||||
metrics.Registry.MustRegister(
|
||||
pendingEphemeralRunners,
|
||||
runningEphemeralRunners,
|
||||
failedEphemeralRunners,
|
||||
runningListeners,
|
||||
)
|
||||
}
|
||||
|
||||
func SetEphemeralRunnerCountsByStatus(commonLabels CommonLabels, pending, running, failed int) {
|
||||
pendingEphemeralRunners.With(commonLabels.labels()).Set(float64(pending))
|
||||
runningEphemeralRunners.With(commonLabels.labels()).Set(float64(running))
|
||||
failedEphemeralRunners.With(commonLabels.labels()).Set(float64(failed))
|
||||
}
|
||||
|
||||
func AddRunningListener(commonLabels CommonLabels) {
|
||||
runningListeners.With(commonLabels.labels()).Set(1)
|
||||
}
|
||||
|
||||
func SubRunningListener(commonLabels CommonLabels) {
|
||||
runningListeners.With(commonLabels.labels()).Set(0)
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"net"
|
||||
"strconv"
|
||||
|
||||
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||
@@ -124,7 +125,12 @@ func (b *resourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.
|
||||
return autoscalingListener, nil
|
||||
}
|
||||
|
||||
func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, secret *corev1.Secret, envs ...corev1.EnvVar) *corev1.Pod {
|
||||
type listenerMetricsServerConfig struct {
|
||||
addr string
|
||||
endpoint string
|
||||
}
|
||||
|
||||
func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, secret *corev1.Secret, metricsConfig *listenerMetricsServerConfig, envs ...corev1.EnvVar) (*corev1.Pod, error) {
|
||||
listenerEnv := []corev1.EnvVar{
|
||||
{
|
||||
Name: "GITHUB_CONFIGURE_URL",
|
||||
@@ -150,6 +156,10 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
|
||||
Name: "GITHUB_RUNNER_SCALE_SET_ID",
|
||||
Value: strconv.Itoa(autoscalingListener.Spec.RunnerScaleSetId),
|
||||
},
|
||||
{
|
||||
Name: "GITHUB_RUNNER_SCALE_SET_NAME",
|
||||
Value: autoscalingListener.Spec.AutoscalingRunnerSetName,
|
||||
},
|
||||
{
|
||||
Name: "GITHUB_RUNNER_LOG_LEVEL",
|
||||
Value: scaleSetListenerLogLevel,
|
||||
@@ -217,6 +227,38 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
|
||||
})
|
||||
}
|
||||
|
||||
var ports []corev1.ContainerPort
|
||||
if metricsConfig != nil && len(metricsConfig.addr) != 0 {
|
||||
listenerEnv = append(
|
||||
listenerEnv,
|
||||
corev1.EnvVar{
|
||||
Name: "GITHUB_METRICS_ADDR",
|
||||
Value: metricsConfig.addr,
|
||||
},
|
||||
corev1.EnvVar{
|
||||
Name: "GITHUB_METRICS_ENDPOINT",
|
||||
Value: metricsConfig.endpoint,
|
||||
},
|
||||
)
|
||||
|
||||
_, portStr, err := net.SplitHostPort(metricsConfig.addr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to split host:port for metrics address: %v", err)
|
||||
}
|
||||
port, err := strconv.ParseInt(portStr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert port %q to int32: %v", portStr, err)
|
||||
}
|
||||
ports = append(
|
||||
ports,
|
||||
corev1.ContainerPort{
|
||||
ContainerPort: int32(port),
|
||||
Protocol: corev1.ProtocolTCP,
|
||||
Name: "metrics",
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
podSpec := corev1.PodSpec{
|
||||
ServiceAccountName: serviceAccount.Name,
|
||||
Containers: []corev1.Container{
|
||||
@@ -228,6 +270,7 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
|
||||
Command: []string{
|
||||
"/github-runnerscaleset-listener",
|
||||
},
|
||||
Ports: ports,
|
||||
},
|
||||
},
|
||||
ImagePullSecrets: autoscalingListener.Spec.ImagePullSecrets,
|
||||
@@ -252,7 +295,7 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
|
||||
Spec: podSpec,
|
||||
}
|
||||
|
||||
return newRunnerScaleSetListenerPod
|
||||
return newRunnerScaleSetListenerPod, nil
|
||||
}
|
||||
|
||||
func (b *resourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener *v1alpha1.AutoscalingListener) *corev1.ServiceAccount {
|
||||
|
||||
@@ -68,7 +68,8 @@ func TestLabelPropagation(t *testing.T) {
|
||||
Name: "test",
|
||||
},
|
||||
}
|
||||
listenerPod := b.newScaleSetListenerPod(listener, listenerServiceAccount, listenerSecret)
|
||||
listenerPod, err := b.newScaleSetListenerPod(listener, listenerServiceAccount, listenerSecret, nil)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, listenerPod.Labels, listener.Labels)
|
||||
|
||||
ephemeralRunner := b.newEphemeralRunner(ephemeralRunnerSet)
|
||||
|
||||
Reference in New Issue
Block a user