mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-11 03:57:01 +00:00
* Enhance RunnerSet to optionally retain PVs accross restarts This is our initial attempt to bring back the ability to retain PVs across runner pod restarts when using RunnerSet. The implementation is composed of two new controllers, `runnerpersistentvolumeclaim-controller` and `runnerpersistentvolume-controller`. It all starts from our existing `runnerset-controller`. The controller now tries to mark any PVCs created by StatefulSets created for the RunnerSet. Once the controller terminated statefulsets, their corresponding PVCs are clean up by `runnerpersistentvolumeclaim-controller`, then PVs are unbound from their corresponding PVCs by `runnerpersistentvolume-controller` so that they can be reused by future PVCs createf for future StatefulSets that shares the same same StorageClass. Ref #1286 * Update E2E test suite to cover runner, docker, and go caching with RunnerSet + PVs Ref #1286
226 lines
7.1 KiB
YAML
226 lines
7.1 KiB
YAML
apiVersion: storage.k8s.io/v1
|
|
kind: StorageClass
|
|
metadata:
|
|
name: ${NAME}
|
|
# In kind environments, the provider writes:
|
|
# /var/lib/docker/volumes/KIND_NODE_CONTAINER_VOL_ID/_data/local-path-provisioner/PV_NAME
|
|
# It can be hundreds of gigabytes depending on what you cache in the test workflow. Beware to not encounter `no space left on device` errors!
|
|
# If you did encounter no space errorrs try:
|
|
# docker system prune
|
|
# docker buildx prune #=> frees up /var/lib/docker/volumes/buildx_buildkit_container-builder0_state
|
|
# sudo rm -rf /var/lib/docker/volumes/KIND_NODE_CONTAINER_VOL_ID/_data/local-path-provisioner #=> frees up local-path-provisioner's data
|
|
provisioner: rancher.io/local-path
|
|
reclaimPolicy: Retain
|
|
volumeBindingMode: WaitForFirstConsumer
|
|
---
|
|
apiVersion: storage.k8s.io/v1
|
|
kind: StorageClass
|
|
metadata:
|
|
name: ${NAME}-var-lib-docker
|
|
labels:
|
|
content: ${NAME}-var-lib-docker
|
|
provisioner: rancher.io/local-path
|
|
reclaimPolicy: Retain
|
|
volumeBindingMode: WaitForFirstConsumer
|
|
---
|
|
apiVersion: storage.k8s.io/v1
|
|
kind: StorageClass
|
|
metadata:
|
|
name: ${NAME}-cache
|
|
labels:
|
|
content: ${NAME}-cache
|
|
provisioner: rancher.io/local-path
|
|
reclaimPolicy: Retain
|
|
volumeBindingMode: WaitForFirstConsumer
|
|
---
|
|
apiVersion: storage.k8s.io/v1
|
|
kind: StorageClass
|
|
metadata:
|
|
name: ${NAME}-runner-tool-cache
|
|
labels:
|
|
content: ${NAME}-runner-tool-cache
|
|
provisioner: rancher.io/local-path
|
|
reclaimPolicy: Retain
|
|
volumeBindingMode: WaitForFirstConsumer
|
|
---
|
|
apiVersion: actions.summerwind.dev/v1alpha1
|
|
kind: RunnerSet
|
|
metadata:
|
|
name: ${NAME}
|
|
spec:
|
|
# MANDATORY because it is based on StatefulSet: Results in a below error when omitted:
|
|
# missing required field "selector" in dev.summerwind.actions.v1alpha1.RunnerSet.spec
|
|
selector:
|
|
matchLabels:
|
|
app: ${NAME}
|
|
|
|
# MANDATORY because it is based on StatefulSet: Results in a below error when omitted:
|
|
# missing required field "serviceName" in dev.summerwind.actions.v1alpha1.RunnerSet.spec]
|
|
serviceName: ${NAME}
|
|
|
|
#replicas: 1
|
|
|
|
# From my limited testing, `ephemeral: true` is more reliable.
|
|
# Seomtimes, updating already deployed runners from `ephemeral: false` to `ephemeral: true` seems to
|
|
# result in queued jobs hanging forever.
|
|
ephemeral: ${TEST_EPHEMERAL}
|
|
|
|
enterprise: ${TEST_ENTERPRISE}
|
|
group: ${TEST_GROUP}
|
|
organization: ${TEST_ORG}
|
|
repository: ${TEST_REPO}
|
|
|
|
#
|
|
# Custom runner image
|
|
#
|
|
image: ${RUNNER_NAME}:${RUNNER_TAG}
|
|
|
|
#
|
|
# dockerd within runner container
|
|
#
|
|
## Replace `mumoshu/actions-runner-dind:dev` with your dind image
|
|
#dockerdWithinRunnerContainer: true
|
|
dockerdWithinRunnerContainer: ${RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER}
|
|
|
|
#
|
|
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
|
#
|
|
#dockerMTU: 1450
|
|
#Runner group
|
|
# labels:
|
|
# - "mylabel 1"
|
|
# - "mylabel 2"
|
|
labels:
|
|
- "${RUNNER_LABEL}"
|
|
#
|
|
# Non-standard working directory
|
|
#
|
|
# workDir: "/"
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ${NAME}
|
|
spec:
|
|
containers:
|
|
- name: runner
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: RUNNER_FEATURE_FLAG_EPHEMERAL
|
|
value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
|
|
- name: GOMODCACHE
|
|
value: "/home/runner/.cache/go-mod"
|
|
volumeMounts:
|
|
# Cache docker image layers, in case dockerdWithinRunnerContainer=true
|
|
- name: var-lib-docker
|
|
mountPath: /var/lib/docker
|
|
# Cache go modules and builds
|
|
# - name: gocache
|
|
# # Run `goenv | grep GOCACHE` to verify the path is correct for your env
|
|
# mountPath: /home/runner/.cache/go-build
|
|
# - name: gomodcache
|
|
# # Run `goenv | grep GOMODCACHE` to verify the path is correct for your env
|
|
# # mountPath: /home/runner/go/pkg/mod
|
|
- name: cache
|
|
# go: could not create module cache: stat /home/runner/.cache/go-mod: permission denied
|
|
mountPath: "/home/runner/.cache"
|
|
- name: runner-tool-cache
|
|
# This corresponds to our runner image's default setting of RUNNER_TOOL_CACHE=/opt/hostedtoolcache.
|
|
#
|
|
# In case you customize the envvar in both runner and docker containers of the runner pod spec,
|
|
# You'd need to change this mountPath accordingly.
|
|
#
|
|
# The tool cache directory is defined in actions/toolkit's tool-cache module:
|
|
# https://github.com/actions/toolkit/blob/2f164000dcd42fb08287824a3bc3030dbed33687/packages/tool-cache/src/tool-cache.ts#L621-L638
|
|
#
|
|
# Many setup-* actions like setup-go utilizes the tool-cache module to download and cache installed binaries:
|
|
# https://github.com/actions/setup-go/blob/56a61c9834b4a4950dbbf4740af0b8a98c73b768/src/installer.ts#L144
|
|
mountPath: "/opt/hostedtoolcache"
|
|
# Valid only when dockerdWithinRunnerContainer=false
|
|
- name: docker
|
|
volumeMounts:
|
|
# Cache docker image layers, in case dockerdWithinRunnerContainer=false
|
|
- name: var-lib-docker
|
|
mountPath: /var/lib/docker
|
|
# image: mumoshu/actions-runner-dind:dev
|
|
|
|
# For buildx cache
|
|
- name: cache
|
|
mountPath: "/home/runner/.cache"
|
|
volumeClaimTemplates:
|
|
- metadata:
|
|
name: vol1
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 10Mi
|
|
storageClassName: ${NAME}
|
|
## Dunno which provider supports auto-provisioning with selector.
|
|
## At least the rancher local path provider stopped with:
|
|
## waiting for a volume to be created, either by external provisioner "rancher.io/local-path" or manually created by system administrator
|
|
# selector:
|
|
# matchLabels:
|
|
# runnerset-volume-id: ${NAME}-vol1
|
|
- metadata:
|
|
name: vol2
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 10Mi
|
|
storageClassName: ${NAME}
|
|
# selector:
|
|
# matchLabels:
|
|
# runnerset-volume-id: ${NAME}-vol2
|
|
- metadata:
|
|
name: var-lib-docker
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 10Mi
|
|
storageClassName: ${NAME}-var-lib-docker
|
|
- metadata:
|
|
name: cache
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 10Mi
|
|
storageClassName: ${NAME}-cache
|
|
- metadata:
|
|
name: runner-tool-cache
|
|
# It turns out labels doesn't distinguish PVs across PVCs and the
|
|
# end result is PVs are reused by wrong PVCs.
|
|
# The correct way seems to be to differentiate storage class per pvc template.
|
|
# labels:
|
|
# id: runner-tool-cache
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 10Mi
|
|
storageClassName: ${NAME}-runner-tool-cache
|
|
---
|
|
apiVersion: actions.summerwind.dev/v1alpha1
|
|
kind: HorizontalRunnerAutoscaler
|
|
metadata:
|
|
name: ${NAME}
|
|
spec:
|
|
scaleTargetRef:
|
|
kind: RunnerSet
|
|
name: ${NAME}
|
|
scaleUpTriggers:
|
|
- githubEvent:
|
|
workflowJob: {}
|
|
amount: 1
|
|
duration: "10m"
|
|
minReplicas: ${RUNNER_MIN_REPLICAS}
|
|
maxReplicas: 10
|
|
scaleDownDelaySecondsAfterScaleOut: ${RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT}
|