mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-10 11:41:27 +00:00
Co-authored-by: Tingluo Huang <tingluohuang@github.com> Co-authored-by: Bassem Dghaidi <568794+Link-@users.noreply.github.com>
331 lines
8.5 KiB
Go
331 lines
8.5 KiB
Go
package main
|
|
|
|
import (
|
|
"strconv"
|
|
|
|
"github.com/actions/actions-runner-controller/github/actions"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
// label names
|
|
const (
|
|
labelKeyRunnerScaleSetName = "name"
|
|
labelKeyRunnerScaleSetNamespace = "namespace"
|
|
labelKeyEnterprise = "enterprise"
|
|
labelKeyOrganization = "organization"
|
|
labelKeyRepository = "repository"
|
|
labelKeyJobName = "job_name"
|
|
labelKeyJobWorkflowRef = "job_workflow_ref"
|
|
labelKeyEventName = "event_name"
|
|
labelKeyJobResult = "job_result"
|
|
labelKeyRunnerID = "runner_id"
|
|
labelKeyRunnerName = "runner_name"
|
|
)
|
|
|
|
const githubScaleSetSubsystem = "gha"
|
|
|
|
// labels
|
|
var (
|
|
scaleSetLabels = []string{
|
|
labelKeyRunnerScaleSetName,
|
|
labelKeyRepository,
|
|
labelKeyOrganization,
|
|
labelKeyEnterprise,
|
|
labelKeyRunnerScaleSetNamespace,
|
|
}
|
|
|
|
jobLabels = []string{
|
|
labelKeyRepository,
|
|
labelKeyOrganization,
|
|
labelKeyEnterprise,
|
|
labelKeyJobName,
|
|
labelKeyJobWorkflowRef,
|
|
labelKeyEventName,
|
|
}
|
|
|
|
completedJobsTotalLabels = append(jobLabels, labelKeyJobResult, labelKeyRunnerID, labelKeyRunnerName)
|
|
jobExecutionDurationLabels = append(jobLabels, labelKeyJobResult, labelKeyRunnerID, labelKeyRunnerName)
|
|
startedJobsTotalLabels = append(jobLabels, labelKeyRunnerID, labelKeyRunnerName)
|
|
jobStartupDurationLabels = append(jobLabels, labelKeyRunnerID, labelKeyRunnerName)
|
|
)
|
|
|
|
// metrics
|
|
var (
|
|
// availableJobs = prometheus.NewGaugeVec(
|
|
// prometheus.GaugeOpts{
|
|
// Subsystem: githubScaleSetSubsystem,
|
|
// Name: "available_jobs",
|
|
// Help: "Number of jobs with `runs-on` matching the runner scale set name. Jobs are not yet assigned to the runner scale set.",
|
|
// },
|
|
// scaleSetLabels,
|
|
// )
|
|
//
|
|
// acquiredJobs = prometheus.NewGaugeVec(
|
|
// prometheus.GaugeOpts{
|
|
// Subsystem: githubScaleSetSubsystem,
|
|
// Name: "acquired_jobs",
|
|
// Help: "Number of jobs acquired by the scale set.",
|
|
// },
|
|
// scaleSetLabels,
|
|
// )
|
|
|
|
assignedJobs = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "assigned_jobs",
|
|
Help: "Number of jobs assigned to this scale set.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
runningJobs = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "running_jobs",
|
|
Help: "Number of jobs running (or about to be run).",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
registeredRunners = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "registered_runners",
|
|
Help: "Number of runners registered by the scale set.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
busyRunners = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "busy_runners",
|
|
Help: "Number of registered runners running a job.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
minRunners = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "min_runners",
|
|
Help: "Minimum number of runners.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
maxRunners = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "max_runners",
|
|
Help: "Maximum number of runners.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
desiredRunners = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "desired_runners",
|
|
Help: "Number of runners desired by the scale set.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
idleRunners = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "idle_runners",
|
|
Help: "Number of registered runners not running a job.",
|
|
},
|
|
scaleSetLabels,
|
|
)
|
|
|
|
startedJobsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "started_jobs_total",
|
|
Help: "Total number of jobs started.",
|
|
},
|
|
startedJobsTotalLabels,
|
|
)
|
|
|
|
completedJobsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "completed_jobs_total",
|
|
Help: "Total number of jobs completed.",
|
|
Subsystem: githubScaleSetSubsystem,
|
|
},
|
|
completedJobsTotalLabels,
|
|
)
|
|
|
|
// jobQueueDurationSeconds = prometheus.NewHistogramVec(
|
|
// prometheus.HistogramOpts{
|
|
// Subsystem: githubScaleSetSubsystem,
|
|
// Name: "job_queue_duration_seconds",
|
|
// Help: "Time spent waiting for workflow jobs to get assigned to the scale set after queueing (in seconds).",
|
|
// Buckets: runtimeBuckets,
|
|
// },
|
|
// jobLabels,
|
|
// )
|
|
|
|
jobStartupDurationSeconds = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "job_startup_duration_seconds",
|
|
Help: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).",
|
|
Buckets: runtimeBuckets,
|
|
},
|
|
jobStartupDurationLabels,
|
|
)
|
|
|
|
jobExecutionDurationSeconds = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Subsystem: githubScaleSetSubsystem,
|
|
Name: "job_execution_duration_seconds",
|
|
Help: "Time spent executing workflow jobs by the scale set (in seconds).",
|
|
Buckets: runtimeBuckets,
|
|
},
|
|
jobExecutionDurationLabels,
|
|
)
|
|
)
|
|
|
|
var runtimeBuckets []float64 = []float64{
|
|
0.01,
|
|
0.05,
|
|
0.1,
|
|
0.5,
|
|
1,
|
|
2,
|
|
3,
|
|
4,
|
|
5,
|
|
6,
|
|
7,
|
|
8,
|
|
9,
|
|
10,
|
|
12,
|
|
15,
|
|
18,
|
|
20,
|
|
25,
|
|
30,
|
|
40,
|
|
50,
|
|
60,
|
|
70,
|
|
80,
|
|
90,
|
|
100,
|
|
110,
|
|
120,
|
|
150,
|
|
180,
|
|
210,
|
|
240,
|
|
300,
|
|
360,
|
|
420,
|
|
480,
|
|
540,
|
|
600,
|
|
900,
|
|
1200,
|
|
1800,
|
|
2400,
|
|
3000,
|
|
3600,
|
|
}
|
|
|
|
type metricsExporter struct {
|
|
// Initialized during creation.
|
|
baseLabels
|
|
}
|
|
|
|
type baseLabels struct {
|
|
scaleSetName string
|
|
scaleSetNamespace string
|
|
enterprise string
|
|
organization string
|
|
repository string
|
|
}
|
|
|
|
func (b *baseLabels) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
|
|
return prometheus.Labels{
|
|
labelKeyEnterprise: b.enterprise,
|
|
labelKeyOrganization: b.organization,
|
|
labelKeyRepository: b.repository,
|
|
labelKeyJobName: jobBase.JobDisplayName,
|
|
labelKeyJobWorkflowRef: jobBase.JobWorkflowRef,
|
|
labelKeyEventName: jobBase.EventName,
|
|
}
|
|
}
|
|
|
|
func (b *baseLabels) scaleSetLabels() prometheus.Labels {
|
|
return prometheus.Labels{
|
|
labelKeyRunnerScaleSetName: b.scaleSetName,
|
|
labelKeyRunnerScaleSetNamespace: b.scaleSetNamespace,
|
|
labelKeyEnterprise: b.enterprise,
|
|
labelKeyOrganization: b.organization,
|
|
labelKeyRepository: b.repository,
|
|
}
|
|
}
|
|
|
|
func (b *baseLabels) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
|
|
l := b.jobLabels(&msg.JobMessageBase)
|
|
l[labelKeyRunnerID] = strconv.Itoa(msg.RunnerId)
|
|
l[labelKeyJobResult] = msg.Result
|
|
l[labelKeyRunnerName] = msg.RunnerName
|
|
return l
|
|
}
|
|
|
|
func (b *baseLabels) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
|
|
l := b.jobLabels(&msg.JobMessageBase)
|
|
l[labelKeyRunnerID] = strconv.Itoa(msg.RunnerId)
|
|
l[labelKeyRunnerName] = msg.RunnerName
|
|
return l
|
|
}
|
|
|
|
func (m *metricsExporter) withBaseLabels(base baseLabels) {
|
|
m.baseLabels = base
|
|
}
|
|
|
|
func (m *metricsExporter) publishStatistics(stats *actions.RunnerScaleSetStatistic) {
|
|
l := m.scaleSetLabels()
|
|
|
|
// availableJobs.With(l).Set(float64(stats.TotalAvailableJobs))
|
|
// acquiredJobs.With(l).Set(float64(stats.TotalAcquiredJobs))
|
|
assignedJobs.With(l).Set(float64(stats.TotalAssignedJobs))
|
|
runningJobs.With(l).Set(float64(stats.TotalRunningJobs))
|
|
registeredRunners.With(l).Set(float64(stats.TotalRegisteredRunners))
|
|
busyRunners.With(l).Set(float64(stats.TotalBusyRunners))
|
|
idleRunners.With(l).Set(float64(stats.TotalIdleRunners))
|
|
}
|
|
|
|
func (m *metricsExporter) publishJobStarted(msg *actions.JobStarted) {
|
|
l := m.startedJobLabels(msg)
|
|
startedJobsTotal.With(l).Inc()
|
|
|
|
startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix()
|
|
jobStartupDurationSeconds.With(l).Observe(float64(startupDuration))
|
|
}
|
|
|
|
// func (m *metricsExporter) publishJobAssigned(msg *actions.JobAssigned) {
|
|
// l := m.jobLabels(&msg.JobMessageBase)
|
|
// queueDuration := msg.JobMessageBase.ScaleSetAssignTime.Unix() - msg.JobMessageBase.QueueTime.Unix()
|
|
// jobQueueDurationSeconds.With(l).Observe(float64(queueDuration))
|
|
// }
|
|
|
|
func (m *metricsExporter) publishJobCompleted(msg *actions.JobCompleted) {
|
|
l := m.completedJobLabels(msg)
|
|
completedJobsTotal.With(l).Inc()
|
|
|
|
executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix()
|
|
jobExecutionDurationSeconds.With(l).Observe(float64(executionDuration))
|
|
}
|
|
|
|
func (m *metricsExporter) publishDesiredRunners(count int) {
|
|
desiredRunners.With(m.scaleSetLabels()).Set(float64(count))
|
|
}
|