Compare commits

..

12 Commits

Author SHA1 Message Date
Kirill Bilchenko
f1d7c52253 bump appVersion to latest available app (#2840) 2023-08-30 15:01:31 +09:00
Jonathan Wiemers
76d622b86b feature: allow custom envornment variables in metricsservice (#2839) 2023-08-30 15:01:06 +09:00
Nathan Heaps
0b24b0d60b Add docs for setting the RUNNER_GRACEFUL_STOP_TIMEOUT env var on docker container (#2843) 2023-08-30 12:30:18 +09:00
Bassem Dghaidi
5e23c598a8 Move top level metrics property up (#2841) 2023-08-29 03:58:08 -04:00
Nikola Jokic
3652932780 Fix canary VERSION parameter (#2842) 2023-08-28 14:46:53 +02:00
Stefan Andres
94065d2fc5 [helm actions-runner-controller] Use namespaceSelector.matchExpression instead of matchLabels (#2830) 2023-08-28 14:24:20 +09:00
jb-2020
b1cc4da5dc Switch git-lfs source to packagecloud (#2838) 2023-08-28 14:23:57 +09:00
Lukas Hauser
8b7bfa5ffb Fix - Actually Enable Sets in addition to Slices in env (#2828) 2023-08-28 13:48:29 +09:00
Nikola Jokic
52fc819339 Fix parsing AcquireJob MessageQueueTokenExpiredError (#2837) 2023-08-25 20:35:01 +02:00
Bassem Dghaidi
215b245881 Upgrade e2e tests to latest version (0.5.0) (#2826) 2023-08-21 17:09:16 +02:00
Bassem Dghaidi
a3df23b07c Add grafana dashboard sample (#2825) 2023-08-21 16:31:55 +02:00
Bassem Dghaidi
f5c69654e7 Revert back the helm chart renaming hotfix (#2823) 2023-08-21 15:44:20 +02:00
20 changed files with 1383 additions and 38 deletions

View File

@@ -16,7 +16,7 @@ env:
TARGET_ORG: actions-runner-controller
TARGET_REPO: arc_e2e_test_dummy
IMAGE_NAME: "arc-test-image"
IMAGE_VERSION: "0.4.0"
IMAGE_VERSION: "0.5.0"
concurrency:
# This will make sure we only apply the concurrency limits on pull requests

View File

@@ -151,7 +151,7 @@ jobs:
GHA_RUNNER_SCALE_SET_CONTROLLER_CHART_VERSION_TAG=$(cat charts/gha-runner-scale-set-controller/Chart.yaml | grep version: | cut -d " " -f 2)
echo "GHA_RUNNER_SCALE_SET_CONTROLLER_CHART_VERSION_TAG=${GHA_RUNNER_SCALE_SET_CONTROLLER_CHART_VERSION_TAG}" >> $GITHUB_ENV
helm package charts/gha-runner-scale-set-controller/ --version="${GHA_RUNNER_SCALE_SET_CONTROLLER_CHART_VERSION_TAG}"
helm push gha-rs-controller-"${GHA_RUNNER_SCALE_SET_CONTROLLER_CHART_VERSION_TAG}".tgz oci://ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/actions-runner-controller-charts
helm push gha-runner-scale-set-controller-"${GHA_RUNNER_SCALE_SET_CONTROLLER_CHART_VERSION_TAG}".tgz oci://ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/actions-runner-controller-charts
- name: Job summary
run: |
@@ -200,7 +200,7 @@ jobs:
GHA_RUNNER_SCALE_SET_CHART_VERSION_TAG=$(cat charts/gha-runner-scale-set/Chart.yaml | grep version: | cut -d " " -f 2)
echo "GHA_RUNNER_SCALE_SET_CHART_VERSION_TAG=${GHA_RUNNER_SCALE_SET_CHART_VERSION_TAG}" >> $GITHUB_ENV
helm package charts/gha-runner-scale-set/ --version="${GHA_RUNNER_SCALE_SET_CHART_VERSION_TAG}"
helm push gha-rs-"${GHA_RUNNER_SCALE_SET_CHART_VERSION_TAG}".tgz oci://ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/actions-runner-controller-charts
helm push gha-runner-scale-set-"${GHA_RUNNER_SCALE_SET_CHART_VERSION_TAG}".tgz oci://ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/actions-runner-controller-charts
- name: Job summary
run: |

View File

@@ -91,7 +91,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
@@ -101,14 +101,14 @@ jobs:
# Normalization is needed because upper case characters are not allowed in the repository name
# and the short sha is needed for image tagging
- name: Resolve parameters
- name: Resolve parameters
id: resolve_parameters
run: |
echo "INFO: Resolving short sha"
echo "short_sha=$(git rev-parse --short ${{ github.ref }})" >> $GITHUB_OUTPUT
echo "INFO: Normalizing repository name (lowercase)"
echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
@@ -124,7 +124,7 @@ jobs:
context: .
file: ./Dockerfile
platforms: linux/amd64,linux/arm64
build-args: VERSION=canary-"${{ github.ref }}"
build-args: VERSION=canary-${{ steps.resolve_parameters.outputs.short_sha }}
push: ${{ env.PUSH_TO_REGISTRIES }}
tags: |
ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/gha-runner-scale-set-controller:canary

View File

@@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.23.3
version: 0.23.5
# Used as the default manager tag value when no tag property is provided in the values.yaml
appVersion: 0.27.4
appVersion: 0.27.5
home: https://github.com/actions/actions-runner-controller

View File

@@ -111,10 +111,14 @@ spec:
name: {{ include "actions-runner-controller.secretName" . }}
optional: true
{{- end }}
{{- if kindIs "slice" .Values.actionsMetricsServer.env }}
{{- toYaml .Values.actionsMetricsServer.env | nindent 8 }}
{{- else }}
{{- range $key, $val := .Values.actionsMetricsServer.env }}
- name: {{ $key }}
value: {{ $val | quote }}
{{- end }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (cat "v" .Chart.AppVersion | replace " " "") }}"
name: actions-metrics-server
imagePullPolicy: {{ .Values.image.pullPolicy }}

View File

@@ -19,7 +19,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}
@@ -50,7 +50,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}
@@ -81,7 +81,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}
@@ -112,7 +112,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}
@@ -156,7 +156,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}
@@ -187,7 +187,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}
@@ -218,7 +218,7 @@ webhooks:
{{- if .Values.scope.singleNamespace }}
namespaceSelector:
matchLabels:
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
kubernetes.io/metadata.name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
{{- end }}
clientConfig:
{{- if .Values.admissionWebHooks.caBundle }}

View File

@@ -151,8 +151,7 @@ podDisruptionBudget:
# PriorityClass: system-cluster-critical
priorityClassName: ""
env:
{}
# env:
# specify additional environment variables for the controller pod.
# It's possible to specify either key vale pairs e.g.:
# http_proxy: "proxy.com:8080"
@@ -303,7 +302,7 @@ githubWebhookServer:
# key: GITHUB_WEBHOOK_SECRET_TOKEN
# name: prod-gha-controller-webhook-token
# optional: true
env: {}
# env:
actionsMetrics:
serviceAnnotations: {}
@@ -322,6 +321,19 @@ actionsMetrics:
image:
repository: quay.io/brancz/kube-rbac-proxy
tag: v0.13.1
# specify additional environment variables for the webhook server pod.
# It's possible to specify either key vale pairs e.g.:
# my_env_var: "some value"
# my_other_env_var: "other value"
# or a list of complete environment variable definitions e.g.:
# - name: GITHUB_WEBHOOK_SECRET_TOKEN
# valueFrom:
# secretKeyRef:
# key: GITHUB_WEBHOOK_SECRET_TOKEN
# name: prod-gha-controller-webhook-token
# optional: true
# env:
actionsMetricsServer:
enabled: false

View File

@@ -75,6 +75,17 @@ affinity: {}
# PriorityClass: system-cluster-critical
priorityClassName: ""
## If `metrics:` object is not provided, or commented out, the following flags
## will be applied the controller-manager and listener pods with empty values:
## `--metrics-addr`, `--listener-metrics-addr`, `--listener-metrics-endpoint`.
## This will disable metrics.
##
## To enable metrics, uncomment the following lines.
# metrics:
# controllerManagerAddr: ":8080"
# listenerAddr: ":8080"
# listenerEndpoint: "/metrics"
flags:
## Log level can be set here with one of the following values: "debug", "info", "warn", "error".
## Defaults to "debug".
@@ -102,14 +113,3 @@ flags:
## This can lead to a longer time to apply the change but it will ensure
## that you don't have any overprovisioning of runners.
updateStrategy: "immediate"
## If `metrics:` object is not provided, or commented out, the following flags
## will be applied the controller-manager and listener pods with empty values:
## `--metrics-addr`, `--listener-metrics-addr`, `--listener-metrics-endpoint`.
## This will disable metrics.
##
## To enable metrics, uncomment the following lines.
# metrics:
# controllerManagerAddr: ":8080"
# listenerAddr: ":8080"
# listenerEndpoint: "/metrics"

View File

@@ -554,7 +554,7 @@ This can be problematic in two scenarios:
> RunnerDeployment is not affected by the Scenario 1 as RunnerDeployment-managed runners are already tolerable to unlimitedly long in-progress running job while being replaced, as it's graceful termination process is handled outside of the entrypoint and the Kubernetes' pod termination process.
To make it more reliable, please set `spec.template.spec.terminationGracePeriodSeconds` field and the `RUNNER_GRACEFUL_STOP_TIMEOUT` environment variable appropriately.
To make it more reliable, please set `spec.template.spec.terminationGracePeriodSeconds` field and the `RUNNER_GRACEFUL_STOP_TIMEOUT` environment variable appropriately. **NOTE:** if you are using the default configuration of running DinD as a sidecar, you'll need to set this environment variable in both `spec.template.spec.env` as well as `spec.template.spec.dockerEnv` for RunnerDeployment objects, otherwise the `docker` container will recieve the same termination signal and exit while the remainder of the build runs.
If you want the pod to terminate in approximately 110 seconds at the latest since the termination request, try `terminationGracePeriodSeconds` of `110` and `RUNNER_GRACEFUL_STOP_TIMEOUT` of like `90`.

View File

@@ -0,0 +1,15 @@
# Visualizing Autoscaling Runner Scale Set metrics with Grafana
With metrics introduced in [gha-runner-scale-set-0.5.0](https://github.com/actions/actions-runner-controller/releases/tag/gha-runner-scale-set-0.5.0), you can now visualize the autoscaling behavior of your runner scale set with your tool of choice. This sample shows how to visualize the metrics with [Grafana](https://grafana.com/).
## Demo
![Grafana dashboard example](grafana-sample.png)
## Setup
We do not intend to provide a supported ARC dashboard. This is simply a reference and a demonstration for how you could leverage the metrics emitted by the controller-manager and listeners to visualize the autoscaling behavior of your runner scale set. We offer no promises of future upgrades to this sample.
1. Make sure to have [Grafana](https://grafana.com/docs/grafana/latest/installation/) and [Prometheus](https://prometheus.io/docs/prometheus/latest/installation/) running in your cluster.
2. Make sure that Prometheus is properly scraping the metrics endpoints of the controller-manager and listeners.
3. Import the [dashboard](ARC-Autoscaling-Runner-Set-Monitoring_1692627561838.json.json) into Grafana.

Binary file not shown.

View File

@@ -634,7 +634,18 @@ func (c *Client) AcquireJobs(ctx context.Context, runnerScaleSetId int, messageQ
}
if resp.StatusCode != http.StatusOK {
return nil, ParseActionsErrorFromResponse(resp)
if resp.StatusCode != http.StatusUnauthorized {
return nil, ParseActionsErrorFromResponse(resp)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
body = trimByteOrderMark(body)
if err != nil {
return nil, err
}
return nil, &MessageQueueTokenExpiredError{msg: string(body)}
}
var acquiredJobs *Int64List

View File

@@ -2,6 +2,7 @@ package actions_test
import (
"context"
"errors"
"net/http"
"strings"
"testing"
@@ -84,6 +85,39 @@ func TestAcquireJobs(t *testing.T) {
assert.NotNil(t, err)
assert.Equalf(t, actualRetry, expectedRetry, "A retry was expected after the first request but got: %v", actualRetry)
})
t.Run("Should return MessageQueueTokenExpiredError when http error is not Unauthorized", func(t *testing.T) {
want := []int64{1}
session := &actions.RunnerScaleSetSession{
RunnerScaleSet: &actions.RunnerScaleSet{Id: 1},
MessageQueueAccessToken: "abc",
}
requestIDs := want
server := newActionsServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "/acquirablejobs") {
w.Write([]byte(`{"count": 1}`))
return
}
if r.Method == http.MethodPost {
http.Error(w, "Session expired", http.StatusUnauthorized)
return
}
}))
client, err := actions.NewClient(server.configURLForOrg("my-org"), auth)
require.NoError(t, err)
_, err = client.GetAcquirableJobs(ctx, 1)
require.NoError(t, err)
got, err := client.AcquireJobs(ctx, session.RunnerScaleSet.Id, session.MessageQueueAccessToken, requestIDs)
require.Error(t, err)
assert.Nil(t, got)
var expectedErr *actions.MessageQueueTokenExpiredError
assert.True(t, errors.As(err, &expectedErr))
})
}
func TestGetAcquirableJobs(t *testing.T) {

View File

@@ -27,7 +27,6 @@ RUN apt-get update -y \
dnsutils \
ftp \
git \
git-lfs \
iproute2 \
iputils-ping \
iptables \
@@ -56,6 +55,10 @@ RUN apt-get update -y \
&& ln -sf /usr/bin/pip3 /usr/bin/pip \
&& rm -rf /var/lib/apt/lists/*
# Download latest git-lfs version
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install -y --no-install-recommends git-lfs
# Runner user
RUN adduser --disabled-password --gecos "" --uid $RUNNER_UID runner

View File

@@ -23,7 +23,6 @@ RUN apt-get update -y \
curl \
ca-certificates \
git \
git-lfs \
iproute2 \
iptables \
jq \
@@ -33,6 +32,10 @@ RUN apt-get update -y \
zip \
&& rm -rf /var/lib/apt/lists/*
# Download latest git-lfs version
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install -y --no-install-recommends git-lfs
# Runner user
RUN adduser --disabled-password --gecos "" --uid $RUNNER_USER_UID runner

View File

@@ -25,7 +25,6 @@ RUN apt-get update -y \
dnsutils \
ftp \
git \
git-lfs \
iproute2 \
iputils-ping \
iptables \
@@ -53,6 +52,10 @@ RUN apt-get update -y \
&& ln -sf /usr/bin/pip3 /usr/bin/pip \
&& rm -rf /var/lib/apt/lists/*
# Download latest git-lfs version
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install -y --no-install-recommends git-lfs
# Runner user
RUN adduser --disabled-password --gecos "" --uid $RUNNER_UID runner \
&& groupadd docker --gid $DOCKER_GID \

View File

@@ -20,7 +20,6 @@ RUN apt-get update -y \
curl \
ca-certificates \
git \
git-lfs \
iptables \
jq \
software-properties-common \
@@ -29,6 +28,10 @@ RUN apt-get update -y \
zip \
&& rm -rf /var/lib/apt/lists/*
# Download latest git-lfs version
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install -y --no-install-recommends git-lfs
# Runner user
RUN adduser --disabled-password --gecos "" --uid $RUNNER_USER_UID runner \
&& groupadd docker --gid $DOCKER_GROUP_GID \

View File

@@ -25,7 +25,6 @@ RUN apt-get update -y \
dnsutils \
ftp \
git \
git-lfs \
iproute2 \
iputils-ping \
jq \
@@ -50,6 +49,10 @@ RUN apt-get update -y \
&& ln -sf /usr/bin/pip3 /usr/bin/pip \
&& rm -rf /var/lib/apt/lists/*
# Download latest git-lfs version
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install -y --no-install-recommends git-lfs
RUN adduser --disabled-password --gecos "" --uid $RUNNER_UID runner \
&& groupadd docker --gid $DOCKER_GID \
&& usermod -aG sudo runner \

View File

@@ -20,13 +20,16 @@ RUN apt-get update -y \
curl \
ca-certificates \
git \
git-lfs \
jq \
sudo \
unzip \
zip \
&& rm -rf /var/lib/apt/lists/*
# Download latest git-lfs version
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install -y --no-install-recommends git-lfs
RUN adduser --disabled-password --gecos "" --uid $RUNNER_USER_UID runner \
&& groupadd docker --gid $DOCKER_GROUP_GID \
&& usermod -aG sudo runner \