Provide scale-set listener metrics (#2559)

Co-authored-by: Tingluo Huang <tingluohuang@github.com>
Co-authored-by: Bassem Dghaidi <568794+Link-@users.noreply.github.com>
This commit is contained in:
Nikola Jokic
2023-08-21 13:50:07 +02:00
committed by GitHub
parent 1c360d7e26
commit a0a3916c80
20 changed files with 975 additions and 427 deletions

View File

@@ -33,6 +33,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"
v1alpha1 "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/controllers/actions.github.com/metrics"
"github.com/actions/actions-runner-controller/github/actions"
hash "github.com/actions/actions-runner-controller/hash"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
@@ -49,6 +51,10 @@ type AutoscalingListenerReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
// ListenerMetricsAddr is address that the metrics endpoint binds to.
// If it is set to "0", the metrics server is not started.
ListenerMetricsAddr string
ListenerMetricsEndpoint string
resourceBuilder resourceBuilder
}
@@ -227,6 +233,11 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
return ctrl.Result{}, err
}
if err := r.publishRunningListener(autoscalingListener, false); err != nil {
// If publish fails, URL is incorrect which means the listener pod would never be able to start
return ctrl.Result{}, nil
}
// Create a listener pod in the controller namespace
log.Info("Creating a listener pod")
return r.createListenerPod(ctx, &autoscalingRunnerSet, autoscalingListener, serviceAccount, mirrorSecret, log)
@@ -242,6 +253,16 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
}
}
if listenerPod.Status.Phase == corev1.PodRunning {
if err := r.publishRunningListener(autoscalingListener, true); err != nil {
log.Error(err, "Unable to publish running listener", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
// stop reconciling. We should never get to this point but if we do,
// listener won't be able to start up, and the crash from the pod should
// notify the reconciler again.
return ctrl.Result{}, nil
}
}
return ctrl.Result{}, nil
}
@@ -260,6 +281,9 @@ func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, au
return false, nil
case err != nil && !kerrors.IsNotFound(err):
return false, fmt.Errorf("failed to get listener pods: %v", err)
default: // NOT FOUND
_ = r.publishRunningListener(autoscalingListener, false) // If error is returned, we never published metrics so it is safe to ignore
}
logger.Info("Listener pod is deleted")
@@ -371,9 +395,22 @@ func (r *AutoscalingListenerReconciler) createListenerPod(ctx context.Context, a
envs = append(envs, env)
}
newPod := r.resourceBuilder.newScaleSetListenerPod(autoscalingListener, serviceAccount, secret, envs...)
var metricsConfig *listenerMetricsServerConfig
if r.ListenerMetricsAddr != "0" {
metricsConfig = &listenerMetricsServerConfig{
addr: r.ListenerMetricsAddr,
endpoint: r.ListenerMetricsEndpoint,
}
}
newPod, err := r.resourceBuilder.newScaleSetListenerPod(autoscalingListener, serviceAccount, secret, metricsConfig, envs...)
if err != nil {
logger.Error(err, "Failed to build listener pod")
return ctrl.Result{}, err
}
if err := ctrl.SetControllerReference(autoscalingListener, newPod, r.Scheme); err != nil {
logger.Error(err, "Failed to set controller reference")
return ctrl.Result{}, err
}
@@ -556,6 +593,30 @@ func (r *AutoscalingListenerReconciler) createRoleBindingForListener(ctx context
return ctrl.Result{Requeue: true}, nil
}
func (r *AutoscalingListenerReconciler) publishRunningListener(autoscalingListener *v1alpha1.AutoscalingListener, isUp bool) error {
githubConfigURL := autoscalingListener.Spec.GitHubConfigUrl
parsedURL, err := actions.ParseGitHubConfigFromURL(githubConfigURL)
if err != nil {
return err
}
commonLabels := metrics.CommonLabels{
Name: autoscalingListener.Name,
Namespace: autoscalingListener.Namespace,
Repository: parsedURL.Repository,
Organization: parsedURL.Organization,
Enterprise: parsedURL.Enterprise,
}
if isUp {
metrics.AddRunningListener(commonLabels)
} else {
metrics.SubRunningListener(commonLabels)
}
return nil
}
// SetupWithManager sets up the controller with the Manager.
func (r *AutoscalingListenerReconciler) SetupWithManager(mgr ctrl.Manager) error {
groupVersionIndexer := func(rawObj client.Object) []string {

View File

@@ -25,6 +25,7 @@ import (
"strings"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/controllers/actions.github.com/metrics"
"github.com/actions/actions-runner-controller/github/actions"
"github.com/go-logr/logr"
"go.uber.org/multierr"
@@ -50,6 +51,8 @@ type EphemeralRunnerSetReconciler struct {
Scheme *runtime.Scheme
ActionsClient actions.MultiClient
PublishMetrics bool
resourceBuilder resourceBuilder
}
@@ -163,6 +166,29 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
"deleting", len(deletingEphemeralRunners),
)
if r.PublishMetrics {
githubConfigURL := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.GitHubConfigUrl
parsedURL, err := actions.ParseGitHubConfigFromURL(githubConfigURL)
if err != nil {
log.Error(err, "Github Config URL is invalid", "URL", githubConfigURL)
// stop reconciling on this object
return ctrl.Result{}, nil
}
metrics.SetEphemeralRunnerCountsByStatus(
metrics.CommonLabels{
Name: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetName],
Namespace: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetNamespace],
Repository: parsedURL.Repository,
Organization: parsedURL.Organization,
Enterprise: parsedURL.Enterprise,
},
len(pendingEphemeralRunners),
len(runningEphemeralRunners),
len(failedEphemeralRunners),
)
}
// cleanup finished runners and proceed
var errs []error
for i := range finishedEphemeralRunners {

View File

@@ -0,0 +1,92 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)
var githubScaleSetControllerSubsystem = "gha_controller"
var labels = []string{
"name",
"namespace",
"repository",
"organization",
"enterprise",
}
type CommonLabels struct {
Name string
Namespace string
Repository string
Organization string
Enterprise string
}
func (l *CommonLabels) labels() prometheus.Labels {
return prometheus.Labels{
"name": l.Name,
"namespace": l.Namespace,
"repository": l.Repository,
"organization": l.Organization,
"enterprise": l.Enterprise,
}
}
var (
pendingEphemeralRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetControllerSubsystem,
Name: "pending_ephemeral_runners",
Help: "Number of ephemeral runners in a pending state.",
},
labels,
)
runningEphemeralRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetControllerSubsystem,
Name: "running_ephemeral_runners",
Help: "Number of ephemeral runners in a running state.",
},
labels,
)
failedEphemeralRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetControllerSubsystem,
Name: "failed_ephemeral_runners",
Help: "Number of ephemeral runners in a failed state.",
},
labels,
)
runningListeners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetControllerSubsystem,
Name: "running_listeners",
Help: "Number of listeners in a running state.",
},
labels,
)
)
func RegisterMetrics() {
metrics.Registry.MustRegister(
pendingEphemeralRunners,
runningEphemeralRunners,
failedEphemeralRunners,
runningListeners,
)
}
func SetEphemeralRunnerCountsByStatus(commonLabels CommonLabels, pending, running, failed int) {
pendingEphemeralRunners.With(commonLabels.labels()).Set(float64(pending))
runningEphemeralRunners.With(commonLabels.labels()).Set(float64(running))
failedEphemeralRunners.With(commonLabels.labels()).Set(float64(failed))
}
func AddRunningListener(commonLabels CommonLabels) {
runningListeners.With(commonLabels.labels()).Set(1)
}
func SubRunningListener(commonLabels CommonLabels) {
runningListeners.With(commonLabels.labels()).Set(0)
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"math"
"net"
"strconv"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
@@ -124,7 +125,12 @@ func (b *resourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.
return autoscalingListener, nil
}
func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, secret *corev1.Secret, envs ...corev1.EnvVar) *corev1.Pod {
type listenerMetricsServerConfig struct {
addr string
endpoint string
}
func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, secret *corev1.Secret, metricsConfig *listenerMetricsServerConfig, envs ...corev1.EnvVar) (*corev1.Pod, error) {
listenerEnv := []corev1.EnvVar{
{
Name: "GITHUB_CONFIGURE_URL",
@@ -150,6 +156,10 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
Name: "GITHUB_RUNNER_SCALE_SET_ID",
Value: strconv.Itoa(autoscalingListener.Spec.RunnerScaleSetId),
},
{
Name: "GITHUB_RUNNER_SCALE_SET_NAME",
Value: autoscalingListener.Spec.AutoscalingRunnerSetName,
},
{
Name: "GITHUB_RUNNER_LOG_LEVEL",
Value: scaleSetListenerLogLevel,
@@ -217,6 +227,38 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
})
}
var ports []corev1.ContainerPort
if metricsConfig != nil && len(metricsConfig.addr) != 0 {
listenerEnv = append(
listenerEnv,
corev1.EnvVar{
Name: "GITHUB_METRICS_ADDR",
Value: metricsConfig.addr,
},
corev1.EnvVar{
Name: "GITHUB_METRICS_ENDPOINT",
Value: metricsConfig.endpoint,
},
)
_, portStr, err := net.SplitHostPort(metricsConfig.addr)
if err != nil {
return nil, fmt.Errorf("failed to split host:port for metrics address: %v", err)
}
port, err := strconv.ParseInt(portStr, 10, 32)
if err != nil {
return nil, fmt.Errorf("failed to convert port %q to int32: %v", portStr, err)
}
ports = append(
ports,
corev1.ContainerPort{
ContainerPort: int32(port),
Protocol: corev1.ProtocolTCP,
Name: "metrics",
},
)
}
podSpec := corev1.PodSpec{
ServiceAccountName: serviceAccount.Name,
Containers: []corev1.Container{
@@ -228,6 +270,7 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
Command: []string{
"/github-runnerscaleset-listener",
},
Ports: ports,
},
},
ImagePullSecrets: autoscalingListener.Spec.ImagePullSecrets,
@@ -252,7 +295,7 @@ func (b *resourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
Spec: podSpec,
}
return newRunnerScaleSetListenerPod
return newRunnerScaleSetListenerPod, nil
}
func (b *resourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener *v1alpha1.AutoscalingListener) *corev1.ServiceAccount {

View File

@@ -68,7 +68,8 @@ func TestLabelPropagation(t *testing.T) {
Name: "test",
},
}
listenerPod := b.newScaleSetListenerPod(listener, listenerServiceAccount, listenerSecret)
listenerPod, err := b.newScaleSetListenerPod(listener, listenerServiceAccount, listenerSecret, nil)
require.NoError(t, err)
assert.Equal(t, listenerPod.Labels, listener.Labels)
ephemeralRunner := b.newEphemeralRunner(ephemeralRunnerSet)