mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-15 06:26:57 +00:00
Create configurable metrics (#3975)
This commit is contained in:
@@ -23,7 +23,7 @@ type App struct {
|
||||
// initialized fields
|
||||
listener Listener
|
||||
worker Worker
|
||||
metrics metrics.ServerPublisher
|
||||
metrics metrics.ServerExporter
|
||||
}
|
||||
|
||||
//go:generate mockery --name Listener --output ./mocks --outpkg mocks --case underscore
|
||||
@@ -69,6 +69,8 @@ func New(config config.Config) (*App, error) {
|
||||
Repository: ghConfig.Repository,
|
||||
ServerAddr: config.MetricsAddr,
|
||||
ServerEndpoint: config.MetricsEndpoint,
|
||||
Logger: app.logger.WithName("metrics exporter"),
|
||||
Metrics: *config.Metrics,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
|
||||
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||
"github.com/actions/actions-runner-controller/build"
|
||||
"github.com/actions/actions-runner-controller/github/actions"
|
||||
"github.com/actions/actions-runner-controller/logging"
|
||||
@@ -16,22 +17,23 @@ import (
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
ConfigureUrl string `json:"configure_url"`
|
||||
AppID int64 `json:"app_id"`
|
||||
AppInstallationID int64 `json:"app_installation_id"`
|
||||
AppPrivateKey string `json:"app_private_key"`
|
||||
Token string `json:"token"`
|
||||
EphemeralRunnerSetNamespace string `json:"ephemeral_runner_set_namespace"`
|
||||
EphemeralRunnerSetName string `json:"ephemeral_runner_set_name"`
|
||||
MaxRunners int `json:"max_runners"`
|
||||
MinRunners int `json:"min_runners"`
|
||||
RunnerScaleSetId int `json:"runner_scale_set_id"`
|
||||
RunnerScaleSetName string `json:"runner_scale_set_name"`
|
||||
ServerRootCA string `json:"server_root_ca"`
|
||||
LogLevel string `json:"log_level"`
|
||||
LogFormat string `json:"log_format"`
|
||||
MetricsAddr string `json:"metrics_addr"`
|
||||
MetricsEndpoint string `json:"metrics_endpoint"`
|
||||
ConfigureUrl string `json:"configure_url"`
|
||||
AppID int64 `json:"app_id"`
|
||||
AppInstallationID int64 `json:"app_installation_id"`
|
||||
AppPrivateKey string `json:"app_private_key"`
|
||||
Token string `json:"token"`
|
||||
EphemeralRunnerSetNamespace string `json:"ephemeral_runner_set_namespace"`
|
||||
EphemeralRunnerSetName string `json:"ephemeral_runner_set_name"`
|
||||
MaxRunners int `json:"max_runners"`
|
||||
MinRunners int `json:"min_runners"`
|
||||
RunnerScaleSetId int `json:"runner_scale_set_id"`
|
||||
RunnerScaleSetName string `json:"runner_scale_set_name"`
|
||||
ServerRootCA string `json:"server_root_ca"`
|
||||
LogLevel string `json:"log_level"`
|
||||
LogFormat string `json:"log_format"`
|
||||
MetricsAddr string `json:"metrics_addr"`
|
||||
MetricsEndpoint string `json:"metrics_endpoint"`
|
||||
Metrics *v1alpha1.MetricsConfig `json:"metrics"`
|
||||
}
|
||||
|
||||
func Read(path string) (Config, error) {
|
||||
|
||||
@@ -2,9 +2,12 @@ package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||
"github.com/actions/actions-runner-controller/github/actions"
|
||||
"github.com/go-logr/logr"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
@@ -22,145 +25,345 @@ const (
|
||||
labelKeyJobResult = "job_result"
|
||||
)
|
||||
|
||||
const githubScaleSetSubsystem = "gha"
|
||||
|
||||
// labels
|
||||
var (
|
||||
scaleSetLabels = []string{
|
||||
labelKeyRunnerScaleSetName,
|
||||
labelKeyRepository,
|
||||
labelKeyOrganization,
|
||||
labelKeyEnterprise,
|
||||
labelKeyRunnerScaleSetNamespace,
|
||||
}
|
||||
|
||||
jobLabels = []string{
|
||||
labelKeyRepository,
|
||||
labelKeyOrganization,
|
||||
labelKeyEnterprise,
|
||||
labelKeyJobName,
|
||||
labelKeyEventName,
|
||||
}
|
||||
|
||||
completedJobsTotalLabels = append(jobLabels, labelKeyJobResult)
|
||||
jobExecutionDurationLabels = append(jobLabels, labelKeyJobResult)
|
||||
startedJobsTotalLabels = jobLabels
|
||||
jobStartupDurationLabels = jobLabels
|
||||
const (
|
||||
githubScaleSetSubsystem = "gha"
|
||||
githubScaleSetSubsystemPrefix = "gha_"
|
||||
)
|
||||
|
||||
var (
|
||||
assignedJobs = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "assigned_jobs",
|
||||
Help: "Number of jobs assigned to this scale set.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
runningJobs = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "running_jobs",
|
||||
Help: "Number of jobs running (or about to be run).",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
registeredRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "registered_runners",
|
||||
Help: "Number of runners registered by the scale set.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
busyRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "busy_runners",
|
||||
Help: "Number of registered runners running a job.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
minRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "min_runners",
|
||||
Help: "Minimum number of runners.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
maxRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "max_runners",
|
||||
Help: "Maximum number of runners.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
desiredRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "desired_runners",
|
||||
Help: "Number of runners desired by the scale set.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
idleRunners = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "idle_runners",
|
||||
Help: "Number of registered runners not running a job.",
|
||||
},
|
||||
scaleSetLabels,
|
||||
)
|
||||
|
||||
startedJobsTotal = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "started_jobs_total",
|
||||
Help: "Total number of jobs started.",
|
||||
},
|
||||
startedJobsTotalLabels,
|
||||
)
|
||||
|
||||
completedJobsTotal = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "completed_jobs_total",
|
||||
Help: "Total number of jobs completed.",
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
},
|
||||
completedJobsTotalLabels,
|
||||
)
|
||||
|
||||
jobStartupDurationSeconds = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "job_startup_duration_seconds",
|
||||
Help: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).",
|
||||
Buckets: runtimeBuckets,
|
||||
},
|
||||
jobStartupDurationLabels,
|
||||
)
|
||||
|
||||
jobExecutionDurationSeconds = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: "job_execution_duration_seconds",
|
||||
Help: "Time spent executing workflow jobs by the scale set (in seconds).",
|
||||
Buckets: runtimeBuckets,
|
||||
},
|
||||
jobExecutionDurationLabels,
|
||||
)
|
||||
// Names of all metrics available on the listener
|
||||
const (
|
||||
MetricAssignedJobs = "gha_assigned_jobs"
|
||||
MetricRunningJobs = "gha_running_jobs"
|
||||
MetricRegisteredRunners = "gha_registered_runners"
|
||||
MetricBusyRunners = "gha_busy_runners"
|
||||
MetricMinRunners = "gha_min_runners"
|
||||
MetricMaxRunners = "gha_max_runners"
|
||||
MetricDesiredRunners = "gha_desired_runners"
|
||||
MetricIdleRunners = "gha_idle_runners"
|
||||
MetricStartedJobsTotal = "gha_started_jobs_total"
|
||||
MetricCompletedJobsTotal = "gha_completed_jobs_total"
|
||||
MetricJobStartupDurationSeconds = "gha_job_startup_duration_seconds"
|
||||
MetricJobExecutionDurationSeconds = "gha_job_execution_duration_seconds"
|
||||
)
|
||||
|
||||
var runtimeBuckets []float64 = []float64{
|
||||
type metricsHelpRegistry struct {
|
||||
counters map[string]string
|
||||
gauges map[string]string
|
||||
histograms map[string]string
|
||||
}
|
||||
|
||||
var metricsHelp = metricsHelpRegistry{
|
||||
counters: map[string]string{
|
||||
MetricStartedJobsTotal: "Total number of jobs started.",
|
||||
MetricCompletedJobsTotal: "Total number of jobs completed.",
|
||||
},
|
||||
gauges: map[string]string{
|
||||
MetricAssignedJobs: "Number of jobs assigned to this scale set.",
|
||||
MetricRunningJobs: "Number of jobs running (or about to be run).",
|
||||
MetricRegisteredRunners: "Number of runners registered by the scale set.",
|
||||
MetricBusyRunners: "Number of registered runners running a job.",
|
||||
MetricMinRunners: "Minimum number of runners.",
|
||||
MetricMaxRunners: "Maximum number of runners.",
|
||||
MetricDesiredRunners: "Number of runners desired by the scale set.",
|
||||
MetricIdleRunners: "Number of registered runners not running a job.",
|
||||
},
|
||||
histograms: map[string]string{
|
||||
MetricJobStartupDurationSeconds: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).",
|
||||
MetricJobExecutionDurationSeconds: "Time spent executing workflow jobs by the scale set (in seconds).",
|
||||
},
|
||||
}
|
||||
|
||||
func (e *exporter) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
|
||||
return prometheus.Labels{
|
||||
labelKeyEnterprise: e.scaleSetLabels[labelKeyEnterprise],
|
||||
labelKeyOrganization: jobBase.OwnerName,
|
||||
labelKeyRepository: jobBase.RepositoryName,
|
||||
labelKeyJobName: jobBase.JobDisplayName,
|
||||
labelKeyEventName: jobBase.EventName,
|
||||
}
|
||||
}
|
||||
|
||||
func (e *exporter) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
|
||||
l := e.jobLabels(&msg.JobMessageBase)
|
||||
l[labelKeyJobResult] = msg.Result
|
||||
return l
|
||||
}
|
||||
|
||||
func (e *exporter) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
|
||||
return e.jobLabels(&msg.JobMessageBase)
|
||||
}
|
||||
|
||||
//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore
|
||||
type Publisher interface {
|
||||
PublishStatic(min, max int)
|
||||
PublishStatistics(stats *actions.RunnerScaleSetStatistic)
|
||||
PublishJobStarted(msg *actions.JobStarted)
|
||||
PublishJobCompleted(msg *actions.JobCompleted)
|
||||
PublishDesiredRunners(count int)
|
||||
}
|
||||
|
||||
//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore
|
||||
type ServerExporter interface {
|
||||
Publisher
|
||||
ListenAndServe(ctx context.Context) error
|
||||
}
|
||||
|
||||
var (
|
||||
_ Publisher = &discard{}
|
||||
_ ServerExporter = &exporter{}
|
||||
)
|
||||
|
||||
var Discard Publisher = &discard{}
|
||||
|
||||
type exporter struct {
|
||||
logger logr.Logger
|
||||
scaleSetLabels prometheus.Labels
|
||||
*metrics
|
||||
srv *http.Server
|
||||
}
|
||||
|
||||
type metrics struct {
|
||||
counters map[string]*counterMetric
|
||||
gauges map[string]*gaugeMetric
|
||||
histograms map[string]*histogramMetric
|
||||
}
|
||||
|
||||
type counterMetric struct {
|
||||
counter *prometheus.CounterVec
|
||||
config *v1alpha1.CounterMetric
|
||||
}
|
||||
|
||||
type gaugeMetric struct {
|
||||
gauge *prometheus.GaugeVec
|
||||
config *v1alpha1.GaugeMetric
|
||||
}
|
||||
|
||||
type histogramMetric struct {
|
||||
histogram *prometheus.HistogramVec
|
||||
config *v1alpha1.HistogramMetric
|
||||
}
|
||||
|
||||
type ExporterConfig struct {
|
||||
ScaleSetName string
|
||||
ScaleSetNamespace string
|
||||
Enterprise string
|
||||
Organization string
|
||||
Repository string
|
||||
ServerAddr string
|
||||
ServerEndpoint string
|
||||
Logger logr.Logger
|
||||
Metrics v1alpha1.MetricsConfig
|
||||
}
|
||||
|
||||
func NewExporter(config ExporterConfig) ServerExporter {
|
||||
reg := prometheus.NewRegistry()
|
||||
|
||||
metrics := installMetrics(config.Metrics, reg, config.Logger)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle(
|
||||
config.ServerEndpoint,
|
||||
promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}),
|
||||
)
|
||||
|
||||
return &exporter{
|
||||
logger: config.Logger.WithName("metrics"),
|
||||
scaleSetLabels: prometheus.Labels{
|
||||
labelKeyRunnerScaleSetName: config.ScaleSetName,
|
||||
labelKeyRunnerScaleSetNamespace: config.ScaleSetNamespace,
|
||||
labelKeyEnterprise: config.Enterprise,
|
||||
labelKeyOrganization: config.Organization,
|
||||
labelKeyRepository: config.Repository,
|
||||
},
|
||||
metrics: metrics,
|
||||
srv: &http.Server{
|
||||
Addr: config.ServerAddr,
|
||||
Handler: mux,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var errUnknownMetricName = errors.New("unknown metric name")
|
||||
|
||||
func installMetrics(config v1alpha1.MetricsConfig, reg *prometheus.Registry, logger logr.Logger) *metrics {
|
||||
logger.Info(
|
||||
"Registering metrics",
|
||||
"gauges",
|
||||
config.Gauges,
|
||||
"counters",
|
||||
config.Counters,
|
||||
"histograms",
|
||||
config.Histograms,
|
||||
)
|
||||
metrics := &metrics{
|
||||
counters: make(map[string]*counterMetric, len(config.Counters)),
|
||||
gauges: make(map[string]*gaugeMetric, len(config.Gauges)),
|
||||
histograms: make(map[string]*histogramMetric, len(config.Histograms)),
|
||||
}
|
||||
for name, cfg := range config.Gauges {
|
||||
help, ok := metricsHelp.gauges[name]
|
||||
if !ok {
|
||||
logger.Error(errUnknownMetricName, "name", name, "kind", "gauge")
|
||||
continue
|
||||
}
|
||||
|
||||
g := prometheus.V2.NewGaugeVec(prometheus.GaugeVecOpts{
|
||||
GaugeOpts: prometheus.GaugeOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix),
|
||||
Help: help,
|
||||
},
|
||||
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
|
||||
})
|
||||
reg.MustRegister(g)
|
||||
metrics.gauges[name] = &gaugeMetric{
|
||||
gauge: g,
|
||||
config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
for name, cfg := range config.Counters {
|
||||
help, ok := metricsHelp.counters[name]
|
||||
if !ok {
|
||||
logger.Error(errUnknownMetricName, "name", name, "kind", "counter")
|
||||
continue
|
||||
}
|
||||
c := prometheus.V2.NewCounterVec(prometheus.CounterVecOpts{
|
||||
CounterOpts: prometheus.CounterOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix),
|
||||
Help: help,
|
||||
},
|
||||
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
|
||||
})
|
||||
reg.MustRegister(c)
|
||||
metrics.counters[name] = &counterMetric{
|
||||
counter: c,
|
||||
config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
for name, cfg := range config.Histograms {
|
||||
help, ok := metricsHelp.histograms[name]
|
||||
if !ok {
|
||||
logger.Error(errUnknownMetricName, "name", name, "kind", "histogram")
|
||||
continue
|
||||
}
|
||||
|
||||
buckets := defaultRuntimeBuckets
|
||||
if len(cfg.Buckets) > 0 {
|
||||
buckets = cfg.Buckets
|
||||
}
|
||||
h := prometheus.V2.NewHistogramVec(prometheus.HistogramVecOpts{
|
||||
HistogramOpts: prometheus.HistogramOpts{
|
||||
Subsystem: githubScaleSetSubsystem,
|
||||
Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix),
|
||||
Help: help,
|
||||
Buckets: buckets,
|
||||
},
|
||||
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
|
||||
})
|
||||
cfg.Buckets = buckets
|
||||
reg.MustRegister(h)
|
||||
metrics.histograms[name] = &histogramMetric{
|
||||
histogram: h,
|
||||
config: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
func (e *exporter) ListenAndServe(ctx context.Context) error {
|
||||
e.logger.Info("starting metrics server", "addr", e.srv.Addr)
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
e.logger.Info("stopping metrics server", "err", ctx.Err())
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
e.srv.Shutdown(ctx)
|
||||
}()
|
||||
return e.srv.ListenAndServe()
|
||||
}
|
||||
|
||||
func (e *exporter) setGauge(name string, allLabels prometheus.Labels, val float64) {
|
||||
m, ok := e.metrics.gauges[name]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
labels := make(prometheus.Labels, len(m.config.Labels))
|
||||
for _, label := range m.config.Labels {
|
||||
labels[label] = allLabels[label]
|
||||
}
|
||||
m.gauge.With(labels).Set(val)
|
||||
}
|
||||
|
||||
func (e *exporter) incCounter(name string, allLabels prometheus.Labels) {
|
||||
m, ok := e.metrics.counters[name]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
labels := make(prometheus.Labels, len(m.config.Labels))
|
||||
for _, label := range m.config.Labels {
|
||||
labels[label] = allLabels[label]
|
||||
}
|
||||
m.counter.With(labels).Inc()
|
||||
}
|
||||
|
||||
func (e *exporter) observeHistogram(name string, allLabels prometheus.Labels, val float64) {
|
||||
m, ok := e.metrics.histograms[name]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
labels := make(prometheus.Labels, len(m.config.Labels))
|
||||
for _, label := range m.config.Labels {
|
||||
labels[label] = allLabels[label]
|
||||
}
|
||||
m.histogram.With(labels).Observe(val)
|
||||
}
|
||||
|
||||
func (e *exporter) PublishStatic(min, max int) {
|
||||
e.setGauge(MetricMaxRunners, e.scaleSetLabels, float64(max))
|
||||
e.setGauge(MetricMinRunners, e.scaleSetLabels, float64(min))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) {
|
||||
e.setGauge(MetricAssignedJobs, e.scaleSetLabels, float64(stats.TotalAssignedJobs))
|
||||
e.setGauge(MetricRunningJobs, e.scaleSetLabels, float64(stats.TotalRunningJobs))
|
||||
e.setGauge(MetricRegisteredRunners, e.scaleSetLabels, float64(stats.TotalRegisteredRunners))
|
||||
e.setGauge(MetricBusyRunners, e.scaleSetLabels, float64(float64(stats.TotalRegisteredRunners)))
|
||||
e.setGauge(MetricIdleRunners, e.scaleSetLabels, float64(stats.TotalIdleRunners))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishJobStarted(msg *actions.JobStarted) {
|
||||
l := e.startedJobLabels(msg)
|
||||
e.incCounter(MetricStartedJobsTotal, l)
|
||||
|
||||
startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix()
|
||||
e.observeHistogram(MetricJobStartupDurationSeconds, l, float64(startupDuration))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) {
|
||||
l := e.completedJobLabels(msg)
|
||||
e.incCounter(MetricCompletedJobsTotal, l)
|
||||
|
||||
executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix()
|
||||
e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(executionDuration))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishDesiredRunners(count int) {
|
||||
e.setGauge(MetricDesiredRunners, e.scaleSetLabels, float64(count))
|
||||
}
|
||||
|
||||
type discard struct{}
|
||||
|
||||
func (*discard) PublishStatic(int, int) {}
|
||||
func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {}
|
||||
func (*discard) PublishJobStarted(*actions.JobStarted) {}
|
||||
func (*discard) PublishJobCompleted(*actions.JobCompleted) {}
|
||||
func (*discard) PublishDesiredRunners(int) {}
|
||||
|
||||
var defaultRuntimeBuckets []float64 = []float64{
|
||||
0.01,
|
||||
0.05,
|
||||
0.1,
|
||||
@@ -207,176 +410,3 @@ var runtimeBuckets []float64 = []float64{
|
||||
3000,
|
||||
3600,
|
||||
}
|
||||
|
||||
type baseLabels struct {
|
||||
scaleSetName string
|
||||
scaleSetNamespace string
|
||||
enterprise string
|
||||
organization string
|
||||
repository string
|
||||
}
|
||||
|
||||
func (b *baseLabels) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
|
||||
return prometheus.Labels{
|
||||
labelKeyEnterprise: b.enterprise,
|
||||
labelKeyOrganization: jobBase.OwnerName,
|
||||
labelKeyRepository: jobBase.RepositoryName,
|
||||
labelKeyJobName: jobBase.JobDisplayName,
|
||||
labelKeyEventName: jobBase.EventName,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *baseLabels) scaleSetLabels() prometheus.Labels {
|
||||
return prometheus.Labels{
|
||||
labelKeyRunnerScaleSetName: b.scaleSetName,
|
||||
labelKeyRunnerScaleSetNamespace: b.scaleSetNamespace,
|
||||
labelKeyEnterprise: b.enterprise,
|
||||
labelKeyOrganization: b.organization,
|
||||
labelKeyRepository: b.repository,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *baseLabels) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
|
||||
l := b.jobLabels(&msg.JobMessageBase)
|
||||
l[labelKeyJobResult] = msg.Result
|
||||
return l
|
||||
}
|
||||
|
||||
func (b *baseLabels) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
|
||||
l := b.jobLabels(&msg.JobMessageBase)
|
||||
return l
|
||||
}
|
||||
|
||||
//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore
|
||||
type Publisher interface {
|
||||
PublishStatic(min, max int)
|
||||
PublishStatistics(stats *actions.RunnerScaleSetStatistic)
|
||||
PublishJobStarted(msg *actions.JobStarted)
|
||||
PublishJobCompleted(msg *actions.JobCompleted)
|
||||
PublishDesiredRunners(count int)
|
||||
}
|
||||
|
||||
//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore
|
||||
type ServerPublisher interface {
|
||||
Publisher
|
||||
ListenAndServe(ctx context.Context) error
|
||||
}
|
||||
|
||||
var (
|
||||
_ Publisher = &discard{}
|
||||
_ ServerPublisher = &exporter{}
|
||||
)
|
||||
|
||||
var Discard Publisher = &discard{}
|
||||
|
||||
type exporter struct {
|
||||
logger logr.Logger
|
||||
baseLabels
|
||||
srv *http.Server
|
||||
}
|
||||
|
||||
type ExporterConfig struct {
|
||||
ScaleSetName string
|
||||
ScaleSetNamespace string
|
||||
Enterprise string
|
||||
Organization string
|
||||
Repository string
|
||||
ServerAddr string
|
||||
ServerEndpoint string
|
||||
Logger logr.Logger
|
||||
}
|
||||
|
||||
func NewExporter(config ExporterConfig) ServerPublisher {
|
||||
reg := prometheus.NewRegistry()
|
||||
reg.MustRegister(
|
||||
assignedJobs,
|
||||
runningJobs,
|
||||
registeredRunners,
|
||||
busyRunners,
|
||||
minRunners,
|
||||
maxRunners,
|
||||
desiredRunners,
|
||||
idleRunners,
|
||||
startedJobsTotal,
|
||||
completedJobsTotal,
|
||||
jobStartupDurationSeconds,
|
||||
jobExecutionDurationSeconds,
|
||||
)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle(
|
||||
config.ServerEndpoint,
|
||||
promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}),
|
||||
)
|
||||
|
||||
return &exporter{
|
||||
logger: config.Logger.WithName("metrics"),
|
||||
baseLabels: baseLabels{
|
||||
scaleSetName: config.ScaleSetName,
|
||||
scaleSetNamespace: config.ScaleSetNamespace,
|
||||
enterprise: config.Enterprise,
|
||||
organization: config.Organization,
|
||||
repository: config.Repository,
|
||||
},
|
||||
srv: &http.Server{
|
||||
Addr: config.ServerAddr,
|
||||
Handler: mux,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (e *exporter) ListenAndServe(ctx context.Context) error {
|
||||
e.logger.Info("starting metrics server", "addr", e.srv.Addr)
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
e.logger.Info("stopping metrics server", "err", ctx.Err())
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
e.srv.Shutdown(ctx)
|
||||
}()
|
||||
return e.srv.ListenAndServe()
|
||||
}
|
||||
|
||||
func (m *exporter) PublishStatic(min, max int) {
|
||||
l := m.scaleSetLabels()
|
||||
maxRunners.With(l).Set(float64(max))
|
||||
minRunners.With(l).Set(float64(min))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) {
|
||||
l := e.scaleSetLabels()
|
||||
|
||||
assignedJobs.With(l).Set(float64(stats.TotalAssignedJobs))
|
||||
runningJobs.With(l).Set(float64(stats.TotalRunningJobs))
|
||||
registeredRunners.With(l).Set(float64(stats.TotalRegisteredRunners))
|
||||
busyRunners.With(l).Set(float64(stats.TotalBusyRunners))
|
||||
idleRunners.With(l).Set(float64(stats.TotalIdleRunners))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishJobStarted(msg *actions.JobStarted) {
|
||||
l := e.startedJobLabels(msg)
|
||||
startedJobsTotal.With(l).Inc()
|
||||
|
||||
startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix()
|
||||
jobStartupDurationSeconds.With(l).Observe(float64(startupDuration))
|
||||
}
|
||||
|
||||
func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) {
|
||||
l := e.completedJobLabels(msg)
|
||||
completedJobsTotal.With(l).Inc()
|
||||
|
||||
executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix()
|
||||
jobExecutionDurationSeconds.With(l).Observe(float64(executionDuration))
|
||||
}
|
||||
|
||||
func (m *exporter) PublishDesiredRunners(count int) {
|
||||
desiredRunners.With(m.scaleSetLabels()).Set(float64(count))
|
||||
}
|
||||
|
||||
type discard struct{}
|
||||
|
||||
func (*discard) PublishStatic(int, int) {}
|
||||
func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {}
|
||||
func (*discard) PublishJobStarted(*actions.JobStarted) {}
|
||||
func (*discard) PublishJobCompleted(*actions.JobCompleted) {}
|
||||
func (*discard) PublishDesiredRunners(int) {}
|
||||
|
||||
88
cmd/ghalistener/metrics/metrics_test.go
Normal file
88
cmd/ghalistener/metrics/metrics_test.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||
"github.com/go-logr/logr"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestInstallMetrics(t *testing.T) {
|
||||
metricsConfig := v1alpha1.MetricsConfig{
|
||||
Counters: map[string]*v1alpha1.CounterMetric{
|
||||
// unknown metric shouldn't be registered
|
||||
"gha_unknown": {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// gauge metric shouldn't be registered from this section
|
||||
MetricAssignedJobs: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// histogram metric shouldn't be registered from this section
|
||||
MetricJobStartupDurationSeconds: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// counter metric should be registered
|
||||
MetricStartedJobsTotal: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
},
|
||||
Gauges: map[string]*v1alpha1.GaugeMetric{
|
||||
// unknown metric shouldn't be registered
|
||||
"gha_unknown": {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// counter metric shouldn't be registered from this section
|
||||
MetricStartedJobsTotal: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// histogram metric shouldn't be registered from this section
|
||||
MetricJobStartupDurationSeconds: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// gauge metric should be registered
|
||||
MetricAssignedJobs: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
},
|
||||
Histograms: map[string]*v1alpha1.HistogramMetric{
|
||||
// unknown metric shouldn't be registered
|
||||
"gha_unknown": {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// counter metric shouldn't be registered from this section
|
||||
MetricStartedJobsTotal: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// gauge metric shouldn't be registered from this section
|
||||
MetricAssignedJobs: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
// histogram metric should be registered
|
||||
MetricJobExecutionDurationSeconds: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
Buckets: []float64{0.1, 1},
|
||||
},
|
||||
// histogram metric should be registered with default runtime buckets
|
||||
MetricJobStartupDurationSeconds: {
|
||||
Labels: []string{labelKeyRepository},
|
||||
},
|
||||
},
|
||||
}
|
||||
reg := prometheus.NewRegistry()
|
||||
|
||||
got := installMetrics(metricsConfig, reg, logr.Discard())
|
||||
assert.Len(t, got.counters, 1)
|
||||
assert.Len(t, got.gauges, 1)
|
||||
assert.Len(t, got.histograms, 2)
|
||||
|
||||
assert.Equal(t, got.counters[MetricStartedJobsTotal].config, metricsConfig.Counters[MetricStartedJobsTotal])
|
||||
assert.Equal(t, got.gauges[MetricAssignedJobs].config, metricsConfig.Gauges[MetricAssignedJobs])
|
||||
assert.Equal(t, got.histograms[MetricJobExecutionDurationSeconds].config, metricsConfig.Histograms[MetricJobExecutionDurationSeconds])
|
||||
|
||||
duration := got.histograms[MetricJobStartupDurationSeconds]
|
||||
assert.Equal(t, duration.config.Labels, metricsConfig.Histograms[MetricJobStartupDurationSeconds].Labels)
|
||||
assert.Equal(t, duration.config.Buckets, defaultRuntimeBuckets)
|
||||
}
|
||||
Reference in New Issue
Block a user