Compare commits

..

8 Commits

Author SHA1 Message Date
Nikola Jokic
ded39bede6 Prepare 0.12.1 release (#4153) 2025-06-27 13:49:47 +02:00
Nikola Jokic
9890c0592d Explicitly requeue during backoff ephemeral runner (#4152) 2025-06-27 12:05:43 +02:00
Nikola Jokic
3b5693eecb Remove check if runner exists after exit code 0 (#4142) 2025-06-27 11:11:39 +02:00
calx
e6e621a50a Remove duplicate float64 call (#4139) 2025-06-24 11:26:20 +02:00
Mark Huijgen
0b2534ebc9 Fix dind sidecar template (#4128) 2025-06-16 12:14:18 +02:00
Jeev B
e858d67926 Fix indentation of startupProbe attributes in dind sidecar (#4126) 2025-06-14 21:05:53 +02:00
Nikola Jokic
bc6c23609a Remove cache for build-push-action (#4124) 2025-06-13 15:26:55 +02:00
Nikola Jokic
666d0c52c4 Bump build-push-action to 6.18.0 (#4123) 2025-06-13 15:09:27 +02:00
14 changed files with 49 additions and 126 deletions

View File

@@ -36,8 +36,8 @@ runs:
driver-opts: image=moby/buildkit:v0.10.6
- name: Build controller image
# https://github.com/docker/build-push-action/releases/tag/v6.15.0
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4
# https://github.com/docker/build-push-action/releases/tag/v6.18.0
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
platforms: linux/amd64

View File

@@ -16,7 +16,7 @@ env:
TARGET_ORG: actions-runner-controller
TARGET_REPO: arc_e2e_test_dummy
IMAGE_NAME: "arc-test-image"
IMAGE_VERSION: "0.12.0"
IMAGE_VERSION: "0.12.1"
concurrency:
# This will make sure we only apply the concurrency limits on pull requests

View File

@@ -93,8 +93,8 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build & push controller image
# https://github.com/docker/build-push-action/releases/tag/v6.15.0
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4
# https://github.com/docker/build-push-action/releases/tag/v6.18.0
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
platforms: linux/amd64,linux/arm64
@@ -103,8 +103,6 @@ jobs:
tags: |
ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/gha-runner-scale-set-controller:${{ inputs.release_tag_name }}
ghcr.io/${{ steps.resolve_parameters.outputs.repository_owner }}/gha-runner-scale-set-controller:${{ inputs.release_tag_name }}-${{ steps.resolve_parameters.outputs.short_sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Job summary
run: |

View File

@@ -75,8 +75,8 @@ jobs:
version: latest
- name: Build controller image
# https://github.com/docker/build-push-action/releases/tag/v6.15.0
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4
# https://github.com/docker/build-push-action/releases/tag/v6.18.0
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
if: steps.list-changed.outputs.changed == 'true'
with:
file: Dockerfile

View File

@@ -123,8 +123,8 @@ jobs:
# Unstable builds - run at your own risk
- name: Build and Push
# https://github.com/docker/build-push-action/releases/tag/v6.15.0
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4
# https://github.com/docker/build-push-action/releases/tag/v6.18.0
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
context: .
file: ./Dockerfile

View File

@@ -15,13 +15,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.12.0
version: 0.12.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.12.0"
appVersion: "0.12.1"
home: https://github.com/actions/actions-runner-controller

View File

@@ -15,13 +15,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.12.0
version: 0.12.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.12.0"
appVersion: "0.12.1"
home: https://github.com/actions/actions-runner-controller

View File

@@ -113,9 +113,9 @@ startupProbe:
command:
- docker
- info
initialDelaySeconds: 0
failureThreshold: 24
periodSeconds: 5
initialDelaySeconds: 0
failureThreshold: 24
periodSeconds: 5
{{- end }}
volumeMounts:
- name: work

View File

@@ -166,11 +166,11 @@ spec:
initContainers:
{{- if eq $containerMode.type "dind" }}
- name: init-dind-externals
{{- include "gha-runner-scale-set.dind-init-container" . | nindent 8 }}
{{- end }}
{{- if (ge (.Capabilities.KubeVersion.Minor | int) 29) }}
{{- include "gha-runner-scale-set.dind-init-container" . | nindent 8 }}
{{- if (ge (.Capabilities.KubeVersion.Minor | int) 29) }}
- name: dind
{{- include "gha-runner-scale-set.dind-container" . | nindent 8 }}
{{- include "gha-runner-scale-set.dind-container" . | nindent 8 }}
{{- end }}
{{- end }}
{{- with .Values.template.spec.initContainers }}
{{- toYaml . | nindent 6 }}

View File

@@ -326,9 +326,16 @@ template:
## command:
## - docker
## - info
## initialDelaySeconds: 0
## failureThreshold: 24
## periodSeconds: 5
## initialDelaySeconds: 0
## failureThreshold: 24
## periodSeconds: 5
## volumeMounts:
## - name: work
## mountPath: /home/runner/_work
## - name: dind-sock
## mountPath: /var/run
## - name: dind-externals
## mountPath: /home/runner/externals
## containers:
## - name: runner
## image: ghcr.io/actions/actions-runner:latest

View File

@@ -468,7 +468,7 @@ func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) {
e.setGauge(MetricAssignedJobs, e.scaleSetLabels, float64(stats.TotalAssignedJobs))
e.setGauge(MetricRunningJobs, e.scaleSetLabels, float64(stats.TotalRunningJobs))
e.setGauge(MetricRegisteredRunners, e.scaleSetLabels, float64(stats.TotalRegisteredRunners))
e.setGauge(MetricBusyRunners, e.scaleSetLabels, float64(float64(stats.TotalBusyRunners)))
e.setGauge(MetricBusyRunners, e.scaleSetLabels, float64(stats.TotalBusyRunners))
e.setGauge(MetricIdleRunners, e.scaleSetLabels, float64(stats.TotalIdleRunners))
}

View File

@@ -201,12 +201,16 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
backoffDuration := failedRunnerBackoff[len(ephemeralRunner.Status.Failures)]
nextReconciliation := lastFailure.Add(backoffDuration)
if !lastFailure.IsZero() && now.Before(&metav1.Time{Time: nextReconciliation}) {
requeueAfter := nextReconciliation.Sub(now.Time)
log.Info("Backing off the next reconciliation due to failure",
"lastFailure", lastFailure,
"nextReconciliation", nextReconciliation,
"requeueAfter", nextReconciliation.Sub(now.Time),
"requeueAfter", requeueAfter,
)
return ctrl.Result{RequeueAfter: now.Sub(nextReconciliation)}, nil
return ctrl.Result{
Requeue: true,
RequeueAfter: requeueAfter,
}, nil
}
secret := new(corev1.Secret)
@@ -293,28 +297,10 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
return ctrl.Result{}, nil
default:
// pod succeeded. We double-check with the service if the runner exists.
// The reason is that image can potentially finish with status 0, but not pick up the job.
existsInService, err := r.runnerRegisteredWithService(ctx, ephemeralRunner.DeepCopy(), log)
if err != nil {
log.Error(err, "Failed to check if runner is registered with the service")
return ctrl.Result{}, err
}
if !existsInService {
// the runner does not exist in the service, so it must be done
log.Info("Ephemeral runner has finished since it does not exist in the service anymore")
if err := r.markAsFinished(ctx, ephemeralRunner, log); err != nil {
log.Error(err, "Failed to mark ephemeral runner as finished")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
// The runner still exists. This can happen if the pod exited with 0 but fails to start
log.Info("Ephemeral runner pod has finished, but the runner still exists in the service. Deleting the pod to restart it.")
if err := r.deletePodAsFailed(ctx, ephemeralRunner, pod, log); err != nil {
log.Error(err, "failed to delete a pod that still exists in the service")
default: // succeeded
log.Info("Ephemeral runner has finished successfully")
if err := r.markAsFinished(ctx, ephemeralRunner, log); err != nil {
log.Error(err, "Failed to mark ephemeral runner as finished")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
@@ -752,35 +738,6 @@ func (r *EphemeralRunnerReconciler) updateRunStatusFromPod(ctx context.Context,
return nil
}
// runnerRegisteredWithService checks if the runner is still registered with the service
// Returns found=false and err=nil if ephemeral runner does not exist in GitHub service and should be deleted
func (r EphemeralRunnerReconciler) runnerRegisteredWithService(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (found bool, err error) {
actionsClient, err := r.GetActionsService(ctx, runner)
if err != nil {
return false, fmt.Errorf("failed to get Actions client for ScaleSet: %w", err)
}
log.Info("Checking if runner exists in GitHub service", "runnerId", runner.Status.RunnerId)
_, err = actionsClient.GetRunner(ctx, int64(runner.Status.RunnerId))
if err != nil {
actionsError := &actions.ActionsError{}
if !errors.As(err, &actionsError) {
return false, err
}
if actionsError.StatusCode != http.StatusNotFound ||
!actionsError.IsException("AgentNotFoundException") {
return false, fmt.Errorf("failed to check if runner exists in GitHub service: %w", err)
}
log.Info("Runner does not exist in GitHub service", "runnerId", runner.Status.RunnerId)
return false, nil
}
log.Info("Runner exists in GitHub service", "runnerId", runner.Status.RunnerId)
return true, nil
}
func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) error {
client, err := r.GetActionsService(ctx, ephemeralRunner)
if err != nil {

View File

@@ -675,53 +675,6 @@ var _ = Describe("EphemeralRunner", func() {
).Should(BeEquivalentTo(true))
})
It("It should re-create pod on exit status 0, but runner exists within the service", func() {
pod := new(corev1.Pod)
Eventually(
func() (bool, error) {
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
return false, err
}
return true, nil
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(BeEquivalentTo(true))
pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{
Name: v1alpha1.EphemeralRunnerContainerName,
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 0,
},
},
})
err := k8sClient.Status().Update(ctx, pod)
Expect(err).To(BeNil(), "failed to update pod status")
updated := new(v1alpha1.EphemeralRunner)
Eventually(func() (bool, error) {
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated)
if err != nil {
return false, err
}
return len(updated.Status.Failures) == 1, nil
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true))
// should re-create after failure
Eventually(
func() (bool, error) {
pod := new(corev1.Pod)
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
return false, err
}
return true, nil
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(BeEquivalentTo(true))
})
It("It should not set the phase to succeeded without pod termination status", func() {
pod := new(corev1.Pod)
Eventually(

View File

@@ -43,6 +43,14 @@ You can follow [this troubleshooting guide](https://docs.github.com/en/actions/h
## Changelog
### 0.12.1
1. Fix indentation of startupProbe attributes in dind sidecar [#4126](https://github.com/actions/actions-runner-controller/pull/4126)
1. Remove duplicate float64 call [#4139](https://github.com/actions/actions-runner-controller/pull/4139)
1. Fix dind sidecar template [#4128](https://github.com/actions/actions-runner-controller/pull/4128)
1. Remove check if runner exists after exit code 0 [#4142](https://github.com/actions/actions-runner-controller/pull/4142)
1. Explicitly requeue during backoff ephemeral runner [#4152](https://github.com/actions/actions-runner-controller/pull/4152)
### 0.12.0
1. Allow use of client id as an app id [#4057](https://github.com/actions/actions-runner-controller/pull/4057)