mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-10 11:41:27 +00:00
Compare commits
270 Commits
actions-ru
...
v0.24.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d88b9630a | ||
|
|
1152e6b31d | ||
|
|
ac27df8301 | ||
|
|
9dd26168d6 | ||
|
|
18bfb28c0b | ||
|
|
84210e900b | ||
|
|
ef3313d147 | ||
|
|
c7eea169ad | ||
|
|
63be0223ad | ||
|
|
5bbea772f7 | ||
|
|
2aa3f1e142 | ||
|
|
3e988afc09 | ||
|
|
84210f3d2b | ||
|
|
536692181b | ||
|
|
23403172cb | ||
|
|
8a8ec43364 | ||
|
|
78c01fd31d | ||
|
|
bf45aa9f6b | ||
|
|
b5aa1750bb | ||
|
|
cdc9d20e7a | ||
|
|
8035d6d9f8 | ||
|
|
65f7ee92a6 | ||
|
|
fca8a538db | ||
|
|
95ddc77245 | ||
|
|
b5194fd75a | ||
|
|
adf69bbea0 | ||
|
|
b43ef70ac6 | ||
|
|
f1caebbaf0 | ||
|
|
ede28f5046 | ||
|
|
f08ab1490d | ||
|
|
772ca57056 | ||
|
|
51b13e3bab | ||
|
|
81017b130f | ||
|
|
bdbcf66569 | ||
|
|
0e15a78541 | ||
|
|
f85c3d06d9 | ||
|
|
51ba7d7160 | ||
|
|
759349de11 | ||
|
|
3014e98681 | ||
|
|
5f4be6a883 | ||
|
|
b98f470a70 | ||
|
|
e46b90f758 | ||
|
|
3a7e8c844b | ||
|
|
65a67ee61c | ||
|
|
215ba36fd1 | ||
|
|
27774b47bd | ||
|
|
fbde2b9a41 | ||
|
|
212098183a | ||
|
|
4a5097d8cf | ||
|
|
9c57d085f8 | ||
|
|
d6622f9369 | ||
|
|
3b67ee727f | ||
|
|
e6bddcd238 | ||
|
|
f60e57d789 | ||
|
|
3ca1152420 | ||
|
|
e94fa19843 | ||
|
|
99832d7104 | ||
|
|
289bcd8b64 | ||
|
|
5e8cba82c2 | ||
|
|
dabbc99c78 | ||
|
|
d01595cfbc | ||
|
|
c1e5829b03 | ||
|
|
800d6bd586 | ||
|
|
d3b7f0bf7d | ||
|
|
dbcb67967f | ||
|
|
55369bf846 | ||
|
|
1f6303daed | ||
|
|
0fd1a681af | ||
|
|
58416db8c8 | ||
|
|
78a0817c2c | ||
|
|
9ed429513d | ||
|
|
46291c1823 | ||
|
|
832e59338e | ||
|
|
70ae5aef1f | ||
|
|
6d10dd8e1d | ||
|
|
61c5a112db | ||
|
|
7bc08fbe7c | ||
|
|
4053ab3e11 | ||
|
|
059481b610 | ||
|
|
9fdb2c009d | ||
|
|
9f7ea0c014 | ||
|
|
0caa0315c6 | ||
|
|
1c726ae20c | ||
|
|
d6cdd5964c | ||
|
|
a622968ff2 | ||
|
|
e8ef84ab76 | ||
|
|
1551f3b5fc | ||
|
|
3ba7179995 | ||
|
|
e7c6c26266 | ||
|
|
ebe7d060cb | ||
|
|
c3e280eadb | ||
|
|
9f254a2393 | ||
|
|
e5cf3b95cf | ||
|
|
24aae58dbc | ||
|
|
13bfa2da4e | ||
|
|
cb4e1fa8f2 | ||
|
|
7a5a6381c3 | ||
|
|
81951780b1 | ||
|
|
3b48db0d26 | ||
|
|
352e206148 | ||
|
|
6288036ed4 | ||
|
|
a37b4dfbe3 | ||
|
|
c4ff1a588f | ||
|
|
4a3b7bc8d5 | ||
|
|
8db071c4ba | ||
|
|
7b8057e417 | ||
|
|
960a704246 | ||
|
|
f907f82275 | ||
|
|
7124451cea | ||
|
|
c8f1acd92c | ||
|
|
b0fd7a75ea | ||
|
|
b09c54045a | ||
|
|
96f2da1c2e | ||
|
|
cac8b76c68 | ||
|
|
e24d942d63 | ||
|
|
b855991373 | ||
|
|
e7e48a77e4 | ||
|
|
85dea9b67c | ||
|
|
1d9347f418 | ||
|
|
631a70a35f | ||
|
|
b614dcf54b | ||
|
|
14f9e7229e | ||
|
|
82770e145b | ||
|
|
971c54bf5c | ||
|
|
b80d9b0cdc | ||
|
|
e46df413a1 | ||
|
|
eb02f6f26e | ||
|
|
7a750b9285 | ||
|
|
d26c8d6529 | ||
|
|
fd0092d13f | ||
|
|
88d17c7988 | ||
|
|
98567dadc9 | ||
|
|
7e8d80689b | ||
|
|
d72c396ff1 | ||
|
|
13e7b440a8 | ||
|
|
a95983fb98 | ||
|
|
ecc8b4472a | ||
|
|
459beeafb9 | ||
|
|
1b327a0721 | ||
|
|
1f8a23c129 | ||
|
|
af8d8f7e1d | ||
|
|
e7ef21fdf9 | ||
|
|
ee7484ac91 | ||
|
|
debf53c640 | ||
|
|
9657d3e5b3 | ||
|
|
2cb04ddde7 | ||
|
|
366f8927d8 | ||
|
|
532a2bb2a9 | ||
|
|
f28cecffe9 | ||
|
|
4cbbcd64ce | ||
|
|
a68eede616 | ||
|
|
c06a806d75 | ||
|
|
857c1700ba | ||
|
|
a40793bb60 | ||
|
|
48a7b78bf3 | ||
|
|
6ff93eae95 | ||
|
|
b25a0fd606 | ||
|
|
3beef84f30 | ||
|
|
76cc758d12 | ||
|
|
c4c6e833a7 | ||
|
|
ecf74e615e | ||
|
|
bb19e85037 | ||
|
|
e7200f274d | ||
|
|
1cc06e7408 | ||
|
|
4551309e30 | ||
|
|
7123b18a47 | ||
|
|
cc55d0bd7d | ||
|
|
c612e87d85 | ||
|
|
326d6a1fe8 | ||
|
|
fa8ff70aa2 | ||
|
|
efb7fca308 | ||
|
|
e4280dcb0d | ||
|
|
f153870f5f | ||
|
|
8ca39caff5 | ||
|
|
791634fb12 | ||
|
|
c4b24f8366 | ||
|
|
a1c6d1d11a | ||
|
|
adc889ce8a | ||
|
|
b83db7be8f | ||
|
|
da2adc0cc5 | ||
|
|
fa287c4395 | ||
|
|
7c0340dea0 | ||
|
|
c3dd1c5c05 | ||
|
|
051089733b | ||
|
|
757e0a82a2 | ||
|
|
83e550cde5 | ||
|
|
22ef7b3a71 | ||
|
|
28fccbcecd | ||
|
|
9628bb2937 | ||
|
|
736a53fed6 | ||
|
|
132faa13a1 | ||
|
|
66e070f798 | ||
|
|
55ff4de79a | ||
|
|
301439b06a | ||
|
|
15ee6d6360 | ||
|
|
5b899f578b | ||
|
|
d8c9eb7ba7 | ||
|
|
cbbc383a80 | ||
|
|
b57e885a73 | ||
|
|
bed927052d | ||
|
|
14a878bfae | ||
|
|
c95e84a528 | ||
|
|
95a5770d55 | ||
|
|
9cc9f8c182 | ||
|
|
b7c5611516 | ||
|
|
138e326705 | ||
|
|
c21fa75afa | ||
|
|
34483e268f | ||
|
|
5f2b5327f7 | ||
|
|
a93b2fdad4 | ||
|
|
25570a0c6d | ||
|
|
d20ad71071 | ||
|
|
8a379ac94b | ||
|
|
27563c4378 | ||
|
|
4a0f68bfe3 | ||
|
|
1917cf90c4 | ||
|
|
0ba3cad6c2 | ||
|
|
7f0e65cb73 | ||
|
|
12a04b7f38 | ||
|
|
a3072c110d | ||
|
|
15b402bb32 | ||
|
|
11be6c1fb6 | ||
|
|
59c3288e87 | ||
|
|
5030e075a9 | ||
|
|
3115d71471 | ||
|
|
c221b6e278 | ||
|
|
a8dbc8a501 | ||
|
|
b1ac63683f | ||
|
|
10bc28af75 | ||
|
|
e23692b3bc | ||
|
|
e7f4a0e200 | ||
|
|
828ddcd44e | ||
|
|
fc821fd473 | ||
|
|
4b0aa92286 | ||
|
|
c69c8dd84d | ||
|
|
e42db00006 | ||
|
|
eff0c7364f | ||
|
|
516695b275 | ||
|
|
686d40c20d | ||
|
|
f0fa99fc53 | ||
|
|
6b12413fdd | ||
|
|
3abecd0f19 | ||
|
|
7156ce040e | ||
|
|
1463d4927f | ||
|
|
5bc16f2619 | ||
|
|
b8e65aa857 | ||
|
|
d4a9750e20 | ||
|
|
a6f0e0008f | ||
|
|
79a31328a5 | ||
|
|
4e6bfd8114 | ||
|
|
3c16188371 | ||
|
|
9e356b419e | ||
|
|
f3ceccd904 | ||
|
|
4b557dc54c | ||
|
|
4c53e3aa75 | ||
|
|
0b9bef2c08 | ||
|
|
a5ed6bd263 | ||
|
|
921f547200 | ||
|
|
9079c5d85f | ||
|
|
a9aea0bd9c | ||
|
|
fcf4778bac | ||
|
|
eb0a4a9603 | ||
|
|
b6151ebb8d | ||
|
|
ba4bd7c0db | ||
|
|
5b92c412a4 | ||
|
|
e22d981d58 | ||
|
|
a7b39cc247 | ||
|
|
1e452358b4 | ||
|
|
92e133e007 | ||
|
|
d0d316252e |
@@ -11,3 +11,4 @@ charts
|
|||||||
*.md
|
*.md
|
||||||
*.txt
|
*.txt
|
||||||
*.sh
|
*.sh
|
||||||
|
test/e2e/.docker-build
|
||||||
|
|||||||
36
.github/ISSUE_TEMPLATE/bug_report.md
vendored
36
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -1,36 +0,0 @@
|
|||||||
---
|
|
||||||
name: Bug report
|
|
||||||
about: Create a report to help us improve
|
|
||||||
title: ''
|
|
||||||
assignees: ''
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
**Describe the bug**
|
|
||||||
A clear and concise description of what the bug is.
|
|
||||||
|
|
||||||
**Checks**
|
|
||||||
|
|
||||||
- [ ] My actions-runner-controller version (v0.x.y) does support the feature
|
|
||||||
- [ ] I'm using an unreleased version of the controller I built from HEAD of the default branch
|
|
||||||
|
|
||||||
**To Reproduce**
|
|
||||||
Steps to reproduce the behavior:
|
|
||||||
1. Go to '...'
|
|
||||||
2. Click on '....'
|
|
||||||
3. Scroll down to '....'
|
|
||||||
4. See error
|
|
||||||
|
|
||||||
**Expected behavior**
|
|
||||||
A clear and concise description of what you expected to happen.
|
|
||||||
|
|
||||||
**Screenshots**
|
|
||||||
If applicable, add screenshots to help explain your problem.
|
|
||||||
|
|
||||||
**Environment (please complete the following information):**
|
|
||||||
- Controller Version [e.g. 0.18.2]
|
|
||||||
- Deployment Method [e.g. Helm and Kustomize ]
|
|
||||||
- Helm Chart Version [e.g. 0.11.0, if applicable]
|
|
||||||
|
|
||||||
**Additional context**
|
|
||||||
Add any other context about the problem here.
|
|
||||||
160
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
160
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
name: Bug Report
|
||||||
|
description: File a bug report
|
||||||
|
title: "Bug"
|
||||||
|
labels: ["bug"]
|
||||||
|
body:
|
||||||
|
- type: input
|
||||||
|
id: controller-version
|
||||||
|
attributes:
|
||||||
|
label: Controller Version
|
||||||
|
description: Refer to semver-like release tags for controller versions. Any release tags prefixed with `actions-runner-controller-` are for chart releases
|
||||||
|
placeholder: ex. 0.18.2 or git commit ID
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: input
|
||||||
|
id: chart-version
|
||||||
|
attributes:
|
||||||
|
label: Helm Chart Version
|
||||||
|
description: Run `helm list` and see what's shown under CHART VERSION. Any release tags prefixed with `actions-runner-controller-` are for chart releases
|
||||||
|
placeholder: ex. 0.11.0
|
||||||
|
- type: dropdown
|
||||||
|
id: deployment-method
|
||||||
|
attributes:
|
||||||
|
label: Deployment Method
|
||||||
|
description: Which deployment method did you use to install ARC?
|
||||||
|
options:
|
||||||
|
- Helm
|
||||||
|
- Kustomize
|
||||||
|
- ArgoCD
|
||||||
|
- Other
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: checkboxes
|
||||||
|
id: checks
|
||||||
|
attributes:
|
||||||
|
label: Checks
|
||||||
|
description: Please check the boxes below before submitting
|
||||||
|
options:
|
||||||
|
- label: This isn't a question or user support case (For Q&A and community support, go to [Discussions](https://github.com/actions-runner-controller/actions-runner-controller/discussions). It might also be a good idea to contract with any of contributors and maintainers if your business is so critical and therefore you need priority support
|
||||||
|
required: true
|
||||||
|
- label: I've read [releasenotes](https://github.com/actions-runner-controller/actions-runner-controller/tree/master/docs/releasenotes) before submitting this issue and I'm sure it's not due to any recently-introduced backward-incompatible changes
|
||||||
|
required: true
|
||||||
|
- label: My actions-runner-controller version (v0.x.y) does support the feature
|
||||||
|
required: true
|
||||||
|
- label: I've already upgraded ARC to the latest and it didn't fix the issue
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: resource-definitions
|
||||||
|
attributes:
|
||||||
|
label: Resource Definitions
|
||||||
|
description: "Add copy(s) of your resource definition(s) (RunnerDeployment or RunnerSet, and HorizontalRunnerAutoscaler. If RunnerSet, also include the StorageClass being used)"
|
||||||
|
render: yaml
|
||||||
|
placeholder: |
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerDeployment
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
#snip
|
||||||
|
---
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
#snip
|
||||||
|
---
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
provisioner: ...
|
||||||
|
reclaimPolicy: ...
|
||||||
|
volumeBindingMode: ...
|
||||||
|
---
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: HorizontalRunnerAutoscaler
|
||||||
|
metadata:
|
||||||
|
name:
|
||||||
|
spec:
|
||||||
|
#snip
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: reproduction-steps
|
||||||
|
attributes:
|
||||||
|
label: To Reproduce
|
||||||
|
description: "Steps to reproduce the behavior"
|
||||||
|
render: markdown
|
||||||
|
placeholder: |
|
||||||
|
1. Go to '...'
|
||||||
|
2. Click on '....'
|
||||||
|
3. Scroll down to '....'
|
||||||
|
4. See error
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: actual-behavior
|
||||||
|
attributes:
|
||||||
|
label: Describe the bug
|
||||||
|
description: Also tell us, what did happen?
|
||||||
|
placeholder: A clear and concise description of what happened.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: expected-behavior
|
||||||
|
attributes:
|
||||||
|
label: Describe the expected behavior
|
||||||
|
description: Also tell us, what did you expect to happen?
|
||||||
|
placeholder: A clear and concise description of what the expected behavior is.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: controller-logs
|
||||||
|
attributes:
|
||||||
|
label: Controller Logs
|
||||||
|
description: "Include logs from `actions-runner-controller`'s controller-manager pod"
|
||||||
|
render: shell
|
||||||
|
placeholder: |
|
||||||
|
To grab controller logs:
|
||||||
|
|
||||||
|
# Set NS according to your setup
|
||||||
|
NS=actions-runner-system
|
||||||
|
|
||||||
|
# Grab the pod name and set it to $POD_NAME
|
||||||
|
kubectl -n $NS get po
|
||||||
|
|
||||||
|
kubectl -n $NS logs $POD_NAME > arc.log
|
||||||
|
|
||||||
|
Upload it to e.g. https://gist.github.com/ and paste the link to it here.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: runner-pod-logs
|
||||||
|
attributes:
|
||||||
|
label: Runner Pod Logs
|
||||||
|
description: "Include logs from runner pod(s)"
|
||||||
|
render: shell
|
||||||
|
placeholder: |
|
||||||
|
To grab the runner pod logs:
|
||||||
|
|
||||||
|
# Set NS according to your setup. It should match your RunnerDeployment's metadata.namespace.
|
||||||
|
NS=default
|
||||||
|
|
||||||
|
# Grab the name of the problematic runner pod and set it to $POD_NAME
|
||||||
|
kubectl -n $NS get po
|
||||||
|
|
||||||
|
kubectl -n $NS logs $POD_NAME -c runner > runnerpod_runner.log
|
||||||
|
kubectl -n $NS logs $POD_NAME -c docker > runnerpod_docker.log
|
||||||
|
|
||||||
|
Upload it to e.g. https://gist.github.com/ and paste the link to it here.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: additional-context
|
||||||
|
attributes:
|
||||||
|
label: Additional Context
|
||||||
|
description: |
|
||||||
|
Add any other context about the problem here.
|
||||||
|
|
||||||
|
Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
|
||||||
15
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
15
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Blank issues are mainly for maintainers who are known to write complete issue descriptions without need to following a form
|
||||||
|
blank_issues_enabled: true
|
||||||
|
contact_links:
|
||||||
|
- name: Sponsor ARC Maintainers
|
||||||
|
about: If your business relies on the continued maintainance of actions-runner-controller, please consider sponsoring the project and the maintainers.
|
||||||
|
url: https://github.com/actions-runner-controller/actions-runner-controller/tree/master/CODEOWNERS
|
||||||
|
- name: Ideas and Feature Requests
|
||||||
|
about: Wanna request a feature? Create a discussion and collect :+1:s first.
|
||||||
|
url: https://github.com/actions-runner-controller/actions-runner-controller/discussions/new?category=ideas
|
||||||
|
- name: Questions and User Support
|
||||||
|
about: Need support using ARC? We use Discussions as the place to provide community support.
|
||||||
|
url: https://github.com/actions-runner-controller/actions-runner-controller/discussions/new?category=questions
|
||||||
|
- name: Need Paid Support?
|
||||||
|
about: Consider contracting with any of the actions-runner-controller maintainers and contributors.
|
||||||
|
url: https://github.com/actions-runner-controller/actions-runner-controller/tree/master/CODEOWNERS
|
||||||
@@ -29,22 +29,22 @@ runs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v1
|
uses: docker/setup-qemu-action@v2
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v1
|
uses: docker/setup-buildx-action@v2
|
||||||
with:
|
with:
|
||||||
version: latest
|
version: latest
|
||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
if: ${{ github.ref == 'master' && github.event.pull_request.merged == true }}
|
if: ${{ github.ref == 'master' && github.event.pull_request.merged == true }}
|
||||||
uses: docker/login-action@v1
|
uses: docker/login-action@v2
|
||||||
with:
|
with:
|
||||||
username: ${{ inputs.username }}
|
username: ${{ inputs.username }}
|
||||||
password: ${{ inputs.password }}
|
password: ${{ inputs.password }}
|
||||||
|
|
||||||
- name: Login to GitHub Container Registry
|
- name: Login to GitHub Container Registry
|
||||||
uses: docker/login-action@v1
|
uses: docker/login-action@v2
|
||||||
if: ${{ github.ref == 'master' && github.event.pull_request.merged == true }}
|
if: ${{ github.ref == 'master' && github.event.pull_request.merged == true }}
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
|
|||||||
25
.github/lock.yml
vendored
25
.github/lock.yml
vendored
@@ -1,25 +0,0 @@
|
|||||||
# Configuration for Lock Threads
|
|
||||||
# Repo: https://github.com/dessant/lock-threads-app
|
|
||||||
# App: https://github.com/apps/lock
|
|
||||||
|
|
||||||
# Number of days of inactivity before a closed issue or pull request is locked
|
|
||||||
daysUntilLock: 7
|
|
||||||
|
|
||||||
# Skip issues and pull requests created before a given timestamp. Timestamp must
|
|
||||||
# follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable
|
|
||||||
skipCreatedBefore: false
|
|
||||||
|
|
||||||
# Issues and pull requests with these labels will be ignored. Set to `[]` to disable
|
|
||||||
exemptLabels: []
|
|
||||||
|
|
||||||
# Label to add before locking, such as `outdated`. Set to `false` to disable
|
|
||||||
lockLabel: false
|
|
||||||
|
|
||||||
# Comment to post before locking. Set to `false` to disable
|
|
||||||
lockComment: >
|
|
||||||
This thread has been automatically locked since there has not been
|
|
||||||
any recent activity after it was closed. Please open a new issue for
|
|
||||||
related bugs.
|
|
||||||
|
|
||||||
# Assign `resolved` as the reason for locking. Set to `false` to disable
|
|
||||||
setLockReason: true
|
|
||||||
22
.github/renovate.json5
vendored
22
.github/renovate.json5
vendored
@@ -14,10 +14,28 @@
|
|||||||
// use https://github.com/actions/runner/releases
|
// use https://github.com/actions/runner/releases
|
||||||
"fileMatch": [
|
"fileMatch": [
|
||||||
".github/workflows/runners.yml"
|
".github/workflows/runners.yml"
|
||||||
],
|
],
|
||||||
"matchStrings": ["RUNNER_VERSION: +(?<currentValue>.*?)\\n"],
|
"matchStrings": ["RUNNER_VERSION: +(?<currentValue>.*?)\\n"],
|
||||||
"depNameTemplate": "actions/runner",
|
"depNameTemplate": "actions/runner",
|
||||||
"datasourceTemplate": "github-releases"
|
"datasourceTemplate": "github-releases"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fileMatch": [
|
||||||
|
"runner/Makefile",
|
||||||
|
"Makefile"
|
||||||
|
],
|
||||||
|
"matchStrings": ["RUNNER_VERSION \\?= +(?<currentValue>.*?)\\n"],
|
||||||
|
"depNameTemplate": "actions/runner",
|
||||||
|
"datasourceTemplate": "github-releases"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fileMatch": [
|
||||||
|
"runner/actions-runner.dockerfile",
|
||||||
|
"runner/actions-runner-dind.dockerfile"
|
||||||
|
],
|
||||||
|
"matchStrings": ["RUNNER_VERSION=+(?<currentValue>.*?)\\n"],
|
||||||
|
"depNameTemplate": "actions/runner",
|
||||||
|
"datasourceTemplate": "github-releases"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
67
.github/stale.yml
vendored
67
.github/stale.yml
vendored
@@ -1,67 +0,0 @@
|
|||||||
# Configuration for probot-stale - https://github.com/probot/stale
|
|
||||||
|
|
||||||
# Number of days of inactivity before an Issue or Pull Request becomes stale
|
|
||||||
daysUntilStale: 30
|
|
||||||
|
|
||||||
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
|
|
||||||
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
|
|
||||||
daysUntilClose: 14
|
|
||||||
|
|
||||||
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
|
|
||||||
onlyLabels: []
|
|
||||||
|
|
||||||
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
|
|
||||||
exemptLabels:
|
|
||||||
- pinned
|
|
||||||
- security
|
|
||||||
- enhancement
|
|
||||||
- refactor
|
|
||||||
- documentation
|
|
||||||
- chore
|
|
||||||
- bug
|
|
||||||
- dependencies
|
|
||||||
- needs-investigation
|
|
||||||
|
|
||||||
# Set to true to ignore issues in a project (defaults to false)
|
|
||||||
exemptProjects: false
|
|
||||||
|
|
||||||
# Set to true to ignore issues in a milestone (defaults to false)
|
|
||||||
exemptMilestones: false
|
|
||||||
|
|
||||||
# Set to true to ignore issues with an assignee (defaults to false)
|
|
||||||
exemptAssignees: false
|
|
||||||
|
|
||||||
# Label to use when marking as stale
|
|
||||||
staleLabel: stale
|
|
||||||
|
|
||||||
# Comment to post when marking as stale. Set to `false` to disable
|
|
||||||
markComment: >
|
|
||||||
This issue has been automatically marked as stale because it has not had
|
|
||||||
recent activity. It will be closed if no further activity occurs. Thank you
|
|
||||||
for your contributions.
|
|
||||||
|
|
||||||
# Comment to post when removing the stale label.
|
|
||||||
# unmarkComment: >
|
|
||||||
# Your comment here.
|
|
||||||
|
|
||||||
# Comment to post when closing a stale Issue or Pull Request.
|
|
||||||
# closeComment: >
|
|
||||||
# Your comment here.
|
|
||||||
|
|
||||||
# Limit the number of actions per hour, from 1-30. Default is 30
|
|
||||||
limitPerRun: 30
|
|
||||||
|
|
||||||
# Limit to only `issues` or `pulls`
|
|
||||||
# only: issues
|
|
||||||
|
|
||||||
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
|
|
||||||
# pulls:
|
|
||||||
# daysUntilStale: 30
|
|
||||||
# markComment: >
|
|
||||||
# This pull request has been automatically marked as stale because it has not had
|
|
||||||
# recent activity. It will be closed if no further activity occurs. Thank you
|
|
||||||
# for your contributions.
|
|
||||||
|
|
||||||
# issues:
|
|
||||||
# exemptLabels:
|
|
||||||
# - confirmed
|
|
||||||
26
.github/workflows/codeql.yml
vendored
Normal file
26
.github/workflows/codeql.yml
vendored
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
name: "Code Scanning"
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [master]
|
||||||
|
pull_request:
|
||||||
|
branches: [master]
|
||||||
|
schedule:
|
||||||
|
- cron: '30 1 * * 0'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
CodeQL-Build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
security-events: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3.0.2
|
||||||
|
- name: Initialize CodeQL
|
||||||
|
uses: github/codeql-action/init@v2.1.11
|
||||||
|
with:
|
||||||
|
languages: go
|
||||||
|
- name: Autobuild
|
||||||
|
uses: github/codeql-action/autobuild@v2.1.11
|
||||||
|
- name: Perform CodeQL Analysis
|
||||||
|
uses: github/codeql-action/analyze@v2.1.11
|
||||||
15
.github/workflows/on-push-lint-charts.yml
vendored
15
.github/workflows/on-push-lint-charts.yml
vendored
@@ -10,7 +10,10 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
env:
|
env:
|
||||||
KUBE_SCORE_VERSION: 1.10.0
|
KUBE_SCORE_VERSION: 1.10.0
|
||||||
HELM_VERSION: v3.4.1
|
HELM_VERSION: v3.8.0
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
lint-test:
|
lint-test:
|
||||||
@@ -18,12 +21,12 @@ jobs:
|
|||||||
name: Lint Chart
|
name: Lint Chart
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Set up Helm
|
- name: Set up Helm
|
||||||
uses: azure/setup-helm@v1
|
uses: azure/setup-helm@217bf70cbd2e930ba2e81ba7e1de2f7faecc42ba
|
||||||
with:
|
with:
|
||||||
version: ${{ env.HELM_VERSION }}
|
version: ${{ env.HELM_VERSION }}
|
||||||
|
|
||||||
@@ -44,12 +47,12 @@ jobs:
|
|||||||
--enable-optional-test container-security-context-readonlyrootfilesystem
|
--enable-optional-test container-security-context-readonlyrootfilesystem
|
||||||
|
|
||||||
# python is a requirement for the chart-testing action below (supports yamllint among other tests)
|
# python is a requirement for the chart-testing action below (supports yamllint among other tests)
|
||||||
- uses: actions/setup-python@v2
|
- uses: actions/setup-python@fff15a21cc8b16191cb1249f621fa3a55b9005b8
|
||||||
with:
|
with:
|
||||||
python-version: 3.7
|
python-version: 3.7
|
||||||
|
|
||||||
- name: Set up chart-testing
|
- name: Set up chart-testing
|
||||||
uses: helm/chart-testing-action@v2.2.0
|
uses: helm/chart-testing-action@62a185010be4cb08459f7acb19f37927235d5cf3
|
||||||
|
|
||||||
- name: Run chart-testing (list-changed)
|
- name: Run chart-testing (list-changed)
|
||||||
id: list-changed
|
id: list-changed
|
||||||
@@ -63,7 +66,7 @@ jobs:
|
|||||||
run: ct lint --config charts/.ci/ct-config.yaml
|
run: ct lint --config charts/.ci/ct-config.yaml
|
||||||
|
|
||||||
- name: Create kind cluster
|
- name: Create kind cluster
|
||||||
uses: helm/kind-action@v1.2.0
|
uses: helm/kind-action@94729529f85113b88f4f819c17ce61382e6d8478
|
||||||
if: steps.list-changed.outputs.changed == 'true'
|
if: steps.list-changed.outputs.changed == 'true'
|
||||||
|
|
||||||
# We need cert-manager already installed in the cluster because we assume the CRDs exist
|
# We need cert-manager already installed in the cluster because we assume the CRDs exist
|
||||||
|
|||||||
@@ -13,7 +13,10 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
KUBE_SCORE_VERSION: 1.10.0
|
KUBE_SCORE_VERSION: 1.10.0
|
||||||
HELM_VERSION: v3.4.1
|
HELM_VERSION: v3.8.0
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
lint-chart:
|
lint-chart:
|
||||||
@@ -23,12 +26,12 @@ jobs:
|
|||||||
publish-chart: ${{ steps.publish-chart-step.outputs.publish }}
|
publish-chart: ${{ steps.publish-chart-step.outputs.publish }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Set up Helm
|
- name: Set up Helm
|
||||||
uses: azure/setup-helm@v1
|
uses: azure/setup-helm@217bf70cbd2e930ba2e81ba7e1de2f7faecc42ba
|
||||||
with:
|
with:
|
||||||
version: ${{ env.HELM_VERSION }}
|
version: ${{ env.HELM_VERSION }}
|
||||||
|
|
||||||
@@ -49,12 +52,12 @@ jobs:
|
|||||||
--enable-optional-test container-security-context-readonlyrootfilesystem
|
--enable-optional-test container-security-context-readonlyrootfilesystem
|
||||||
|
|
||||||
# python is a requirement for the chart-testing action below (supports yamllint among other tests)
|
# python is a requirement for the chart-testing action below (supports yamllint among other tests)
|
||||||
- uses: actions/setup-python@v2
|
- uses: actions/setup-python@fff15a21cc8b16191cb1249f621fa3a55b9005b8
|
||||||
with:
|
with:
|
||||||
python-version: 3.7
|
python-version: 3.7
|
||||||
|
|
||||||
- name: Set up chart-testing
|
- name: Set up chart-testing
|
||||||
uses: helm/chart-testing-action@v2.2.0
|
uses: helm/chart-testing-action@62a185010be4cb08459f7acb19f37927235d5cf3
|
||||||
|
|
||||||
- name: Run chart-testing (list-changed)
|
- name: Run chart-testing (list-changed)
|
||||||
id: list-changed
|
id: list-changed
|
||||||
@@ -68,7 +71,7 @@ jobs:
|
|||||||
run: ct lint --config charts/.ci/ct-config.yaml
|
run: ct lint --config charts/.ci/ct-config.yaml
|
||||||
|
|
||||||
- name: Create kind cluster
|
- name: Create kind cluster
|
||||||
uses: helm/kind-action@v1.2.0
|
uses: helm/kind-action@94729529f85113b88f4f819c17ce61382e6d8478
|
||||||
if: steps.list-changed.outputs.changed == 'true'
|
if: steps.list-changed.outputs.changed == 'true'
|
||||||
|
|
||||||
# We need cert-manager already installed in the cluster because we assume the CRDs exist
|
# We need cert-manager already installed in the cluster because we assume the CRDs exist
|
||||||
@@ -97,6 +100,8 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
publish-chart:
|
publish-chart:
|
||||||
|
permissions:
|
||||||
|
contents: write # for helm/chart-releaser-action to push chart release and create a release
|
||||||
if: needs.lint-chart.outputs.publish-chart == 'true'
|
if: needs.lint-chart.outputs.publish-chart == 'true'
|
||||||
needs: lint-chart
|
needs: lint-chart
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -104,7 +109,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
@@ -114,7 +119,7 @@ jobs:
|
|||||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||||
|
|
||||||
- name: Run chart-releaser
|
- name: Run chart-releaser
|
||||||
uses: helm/chart-releaser-action@v1.3.0
|
uses: helm/chart-releaser-action@a3454e46a6f5ac4811069a381e646961dda2e1bf
|
||||||
env:
|
env:
|
||||||
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
|
|
||||||
|
|||||||
14
.github/workflows/release.yml
vendored
14
.github/workflows/release.yml
vendored
@@ -16,7 +16,11 @@ jobs:
|
|||||||
run: echo ::set-output name=sha_short::${GITHUB_SHA::7}
|
run: echo ::set-output name=sha_short::${GITHUB_SHA::7}
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
|
|
||||||
|
- uses: actions/setup-go@193b404f8a1d1dccaf6ed9bf03cdb68d2d02020f
|
||||||
|
with:
|
||||||
|
go-version: '1.18.2'
|
||||||
|
|
||||||
- name: Install tools
|
- name: Install tools
|
||||||
run: |
|
run: |
|
||||||
@@ -38,22 +42,22 @@ jobs:
|
|||||||
run: make github-release
|
run: make github-release
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v1
|
uses: docker/setup-qemu-action@0522dcd2bf084920c411162fde334a308be75015
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
id: buildx
|
id: buildx
|
||||||
uses: docker/setup-buildx-action@v1
|
uses: docker/setup-buildx-action@91cb32d715c128e5f0ede915cd7e196ab7799b83
|
||||||
with:
|
with:
|
||||||
version: latest
|
version: latest
|
||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
uses: docker/login-action@v1
|
uses: docker/login-action@d398f07826957cd0a18ea1b059cf1207835e60bc
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKER_USER }}
|
username: ${{ secrets.DOCKER_USER }}
|
||||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||||
|
|
||||||
- name: Build and Push
|
- name: Build and Push
|
||||||
uses: docker/build-push-action@v2
|
uses: docker/build-push-action@c5e6528d5ddefc82f682165021e05edf58044bce
|
||||||
with:
|
with:
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|||||||
15
.github/workflows/runners.yml
vendored
15
.github/workflows/runners.yml
vendored
@@ -2,7 +2,7 @@ name: Runners
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
types:
|
types:
|
||||||
- opened
|
- opened
|
||||||
- synchronize
|
- synchronize
|
||||||
- reopened
|
- reopened
|
||||||
@@ -11,11 +11,12 @@ on:
|
|||||||
- 'master'
|
- 'master'
|
||||||
paths:
|
paths:
|
||||||
- 'runner/**'
|
- 'runner/**'
|
||||||
|
- '!runner/Makefile'
|
||||||
- .github/workflows/runners.yml
|
- .github/workflows/runners.yml
|
||||||
- '!**.md'
|
- '!**.md'
|
||||||
|
|
||||||
env:
|
env:
|
||||||
RUNNER_VERSION: 2.287.1
|
RUNNER_VERSION: 2.292.0
|
||||||
DOCKER_VERSION: 20.10.12
|
DOCKER_VERSION: 20.10.12
|
||||||
DOCKERHUB_USERNAME: summerwind
|
DOCKERHUB_USERNAME: summerwind
|
||||||
|
|
||||||
@@ -33,30 +34,28 @@ jobs:
|
|||||||
- name: actions-runner
|
- name: actions-runner
|
||||||
os-name: ubuntu
|
os-name: ubuntu
|
||||||
os-version: 20.04
|
os-version: 20.04
|
||||||
dockerfile: Dockerfile
|
|
||||||
- name: actions-runner-dind
|
- name: actions-runner-dind
|
||||||
os-name: ubuntu
|
os-name: ubuntu
|
||||||
os-version: 20.04
|
os-version: 20.04
|
||||||
dockerfile: Dockerfile.dindrunner
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
|
|
||||||
- name: Setup Docker Environment
|
- name: Setup Docker Environment
|
||||||
id: vars
|
id: vars
|
||||||
uses: ./.github/actions/setup-docker-environment
|
uses: ./.github/actions/setup-docker-environment
|
||||||
with:
|
with:
|
||||||
username: ${{ env.DOCKERHUB_USERNAME }}
|
username: ${{ env.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||||
ghcr_username: ${{ github.actor }}
|
ghcr_username: ${{ github.actor }}
|
||||||
ghcr_password: ${{ secrets.GITHUB_TOKEN }}
|
ghcr_password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Build and Push Versioned Tags
|
- name: Build and Push Versioned Tags
|
||||||
uses: docker/build-push-action@v2
|
uses: docker/build-push-action@c5e6528d5ddefc82f682165021e05edf58044bce
|
||||||
with:
|
with:
|
||||||
context: ./runner
|
context: ./runner
|
||||||
file: ./runner/${{ matrix.dockerfile }}
|
file: ./runner/${{ matrix.name }}.dockerfile
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64,linux/arm64
|
||||||
push: ${{ github.ref == 'master' && github.event.pull_request.merged == true }}
|
push: ${{ github.ref == 'master' && github.event.pull_request.merged == true }}
|
||||||
build-args: |
|
build-args: |
|
||||||
|
|||||||
25
.github/workflows/stale.yaml
vendored
Normal file
25
.github/workflows/stale.yaml
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
name: 'Close stale issues and PRs'
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# 01:30 every day
|
||||||
|
- cron: '30 1 * * *'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
stale:
|
||||||
|
permissions:
|
||||||
|
issues: write # for actions/stale to close stale issues
|
||||||
|
pull-requests: write # for actions/stale to close stale PRs
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/stale@65d24b70926a596b0f0098d7e1eb572175d73bc1
|
||||||
|
with:
|
||||||
|
stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
||||||
|
# turn off stale for both issues and PRs
|
||||||
|
days-before-stale: -1
|
||||||
|
# turn stale back on for issues only
|
||||||
|
days-before-issue-stale: 30
|
||||||
|
days-before-issue-close: 14
|
||||||
|
exempt-issue-labels: 'pinned,security,enhancement,refactor,documentation,chore,bug,dependencies,needs-investigation'
|
||||||
8
.github/workflows/test-entrypoint.yaml
vendored
8
.github/workflows/test-entrypoint.yaml
vendored
@@ -9,14 +9,16 @@ on:
|
|||||||
- 'test/entrypoint/**'
|
- 'test/entrypoint/**'
|
||||||
- '!**.md'
|
- '!**.md'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: Test entrypoint
|
name: Test entrypoint
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
- name: Run unit tests for entrypoint.sh
|
- name: Run unit tests for entrypoint.sh
|
||||||
run: |
|
run: |
|
||||||
cd test/entrypoint
|
make acceptance/runner/entrypoint
|
||||||
bash entrypoint_unittest.sh
|
|
||||||
|
|||||||
16
.github/workflows/test.yaml
vendored
16
.github/workflows/test.yaml
vendored
@@ -15,17 +15,27 @@ on:
|
|||||||
- '**.md'
|
- '**.md'
|
||||||
- '.gitignore'
|
- '.gitignore'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: Test
|
name: Test
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
- uses: actions/setup-go@v2
|
- uses: actions/setup-go@193b404f8a1d1dccaf6ed9bf03cdb68d2d02020f
|
||||||
with:
|
with:
|
||||||
go-version: '^1.17.5'
|
go-version: '1.18.2'
|
||||||
|
check-latest: false
|
||||||
- run: go version
|
- run: go version
|
||||||
|
- uses: actions/cache@95f200e41cfa87b8e07f30196c0df17a67e67786
|
||||||
|
with:
|
||||||
|
path: ~/go/pkg/mod
|
||||||
|
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
|
||||||
|
restore-keys: |
|
||||||
|
${{ runner.os }}-go-
|
||||||
- name: Install kubebuilder
|
- name: Install kubebuilder
|
||||||
run: |
|
run: |
|
||||||
curl -L -O https://github.com/kubernetes-sigs/kubebuilder/releases/download/v2.3.2/kubebuilder_2.3.2_linux_amd64.tar.gz
|
curl -L -O https://github.com/kubernetes-sigs/kubebuilder/releases/download/v2.3.2/kubebuilder_2.3.2_linux_amd64.tar.gz
|
||||||
|
|||||||
13
.github/workflows/wip.yml
vendored
13
.github/workflows/wip.yml
vendored
@@ -14,6 +14,9 @@ on:
|
|||||||
- "**.md"
|
- "**.md"
|
||||||
- ".gitignore"
|
- ".gitignore"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -22,19 +25,19 @@ jobs:
|
|||||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKER_USER }}
|
DOCKERHUB_USERNAME: ${{ secrets.DOCKER_USER }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v1
|
uses: docker/setup-qemu-action@0522dcd2bf084920c411162fde334a308be75015
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
id: buildx
|
id: buildx
|
||||||
uses: docker/setup-buildx-action@v1
|
uses: docker/setup-buildx-action@91cb32d715c128e5f0ede915cd7e196ab7799b83
|
||||||
with:
|
with:
|
||||||
version: latest
|
version: latest
|
||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
uses: docker/login-action@v1
|
uses: docker/login-action@d398f07826957cd0a18ea1b059cf1207835e60bc
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKER_USER }}
|
username: ${{ secrets.DOCKER_USER }}
|
||||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||||
@@ -42,7 +45,7 @@ jobs:
|
|||||||
# Considered unstable builds
|
# Considered unstable builds
|
||||||
# See Issue #285, PR #286, and PR #323 for more information
|
# See Issue #285, PR #286, and PR #323 for more information
|
||||||
- name: Build and Push
|
- name: Build and Push
|
||||||
uses: docker/build-push-action@v2
|
uses: docker/build-push-action@c5e6528d5ddefc82f682165021e05edf58044bce
|
||||||
with:
|
with:
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|||||||
2
CODEOWNERS
Normal file
2
CODEOWNERS
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# actions-runner-controller maintainers
|
||||||
|
* @mumoshu @toast-gear
|
||||||
43
Dockerfile
43
Dockerfile
@@ -1,29 +1,44 @@
|
|||||||
# Build the manager binary
|
# Build the manager binary
|
||||||
FROM golang:1.17 as builder
|
FROM --platform=$BUILDPLATFORM golang:1.18.2 as builder
|
||||||
|
|
||||||
ARG TARGETPLATFORM
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
ENV GO111MODULE=on \
|
# Make it runnable on a distroless image/without libc
|
||||||
CGO_ENABLED=0
|
ENV CGO_ENABLED=0
|
||||||
|
|
||||||
# Copy the Go Modules manifests
|
# Copy the Go Modules manifests
|
||||||
COPY go.mod go.sum ./
|
COPY go.mod go.sum ./
|
||||||
|
|
||||||
# cache deps before building and copying source so that we don't need to re-download as much
|
# cache deps before building and copying source so that we don't need to re-download as much
|
||||||
# and so that source changes don't invalidate our downloaded layer
|
# and so that source changes don't invalidate our downloaded layer.
|
||||||
|
#
|
||||||
|
# Also, we need to do this before setting TARGETPLATFORM/TARGETOS/TARGETARCH/TARGETVARIANT
|
||||||
|
# so that go mod cache is shared across platforms.
|
||||||
RUN go mod download
|
RUN go mod download
|
||||||
|
|
||||||
# Copy the go source
|
# Copy the go source
|
||||||
COPY . .
|
# COPY . .
|
||||||
|
|
||||||
|
# Usage:
|
||||||
|
# docker buildx build --tag repo/img:tag -f ./Dockerfile . --platform linux/amd64,linux/arm64,linux/arm/v7
|
||||||
|
#
|
||||||
|
# With the above commmand,
|
||||||
|
# TARGETOS can be "linux", TARGETARCH can be "amd64", "arm64", and "arm", TARGETVARIANT can be "v7".
|
||||||
|
|
||||||
|
ARG TARGETPLATFORM TARGETOS TARGETARCH TARGETVARIANT
|
||||||
|
|
||||||
|
# We intentionally avoid `--mount=type=cache,mode=0777,target=/go/pkg/mod` in the `go mod download` and the `go build` runs
|
||||||
|
# to avoid https://github.com/moby/buildkit/issues/2334
|
||||||
|
# We can use docker layer cache so the build is fast enogh anyway
|
||||||
|
# We also use per-platform GOCACHE for the same reason.
|
||||||
|
env GOCACHE /build/${TARGETPLATFORM}/root/.cache/go-build
|
||||||
|
|
||||||
# Build
|
# Build
|
||||||
RUN export GOOS=$(echo ${TARGETPLATFORM} | cut -d / -f1) && \
|
RUN --mount=target=. \
|
||||||
export GOARCH=$(echo ${TARGETPLATFORM} | cut -d / -f2) && \
|
--mount=type=cache,mode=0777,target=${GOCACHE} \
|
||||||
GOARM=$(echo ${TARGETPLATFORM} | cut -d / -f3 | cut -c2-) && \
|
export GOOS=${TARGETOS} GOARCH=${TARGETARCH} GOARM=${TARGETVARIANT#v} && \
|
||||||
go build -a -o manager main.go && \
|
go build -o /out/manager main.go && \
|
||||||
go build -a -o github-webhook-server ./cmd/githubwebhookserver
|
go build -o /out/github-webhook-server ./cmd/githubwebhookserver
|
||||||
|
|
||||||
# Use distroless as minimal base image to package the manager binary
|
# Use distroless as minimal base image to package the manager binary
|
||||||
# Refer to https://github.com/GoogleContainerTools/distroless for more details
|
# Refer to https://github.com/GoogleContainerTools/distroless for more details
|
||||||
@@ -31,8 +46,8 @@ FROM gcr.io/distroless/static:nonroot
|
|||||||
|
|
||||||
WORKDIR /
|
WORKDIR /
|
||||||
|
|
||||||
COPY --from=builder /workspace/manager .
|
COPY --from=builder /out/manager .
|
||||||
COPY --from=builder /workspace/github-webhook-server .
|
COPY --from=builder /out/github-webhook-server .
|
||||||
|
|
||||||
USER nonroot:nonroot
|
USER nonroot:nonroot
|
||||||
|
|
||||||
|
|||||||
21
Makefile
21
Makefile
@@ -5,6 +5,7 @@ else
|
|||||||
endif
|
endif
|
||||||
DOCKER_USER ?= $(shell echo ${NAME} | cut -d / -f1)
|
DOCKER_USER ?= $(shell echo ${NAME} | cut -d / -f1)
|
||||||
VERSION ?= latest
|
VERSION ?= latest
|
||||||
|
RUNNER_VERSION ?= 2.292.0
|
||||||
TARGETPLATFORM ?= $(shell arch)
|
TARGETPLATFORM ?= $(shell arch)
|
||||||
RUNNER_NAME ?= ${DOCKER_USER}/actions-runner
|
RUNNER_NAME ?= ${DOCKER_USER}/actions-runner
|
||||||
RUNNER_TAG ?= ${VERSION}
|
RUNNER_TAG ?= ${VERSION}
|
||||||
@@ -12,9 +13,8 @@ TEST_REPO ?= ${DOCKER_USER}/actions-runner-controller
|
|||||||
TEST_ORG ?=
|
TEST_ORG ?=
|
||||||
TEST_ORG_REPO ?=
|
TEST_ORG_REPO ?=
|
||||||
TEST_EPHEMERAL ?= false
|
TEST_EPHEMERAL ?= false
|
||||||
SYNC_PERIOD ?= 5m
|
SYNC_PERIOD ?= 1m
|
||||||
USE_RUNNERSET ?=
|
USE_RUNNERSET ?=
|
||||||
RUNNER_FEATURE_FLAG_EPHEMERAL ?=
|
|
||||||
KUBECONTEXT ?= kind-acceptance
|
KUBECONTEXT ?= kind-acceptance
|
||||||
CLUSTER ?= acceptance
|
CLUSTER ?= acceptance
|
||||||
CERT_MANAGER_VERSION ?= v1.1.1
|
CERT_MANAGER_VERSION ?= v1.1.1
|
||||||
@@ -56,6 +56,7 @@ GO_TEST_ARGS ?= -short
|
|||||||
# Run tests
|
# Run tests
|
||||||
test: generate fmt vet manifests
|
test: generate fmt vet manifests
|
||||||
go test $(GO_TEST_ARGS) ./... -coverprofile cover.out
|
go test $(GO_TEST_ARGS) ./... -coverprofile cover.out
|
||||||
|
go test -fuzz=Fuzz -fuzztime=10s -run=Fuzz* ./controllers
|
||||||
|
|
||||||
test-with-deps: kube-apiserver etcd kubectl
|
test-with-deps: kube-apiserver etcd kubectl
|
||||||
# See https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/envtest#pkg-constants
|
# See https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/envtest#pkg-constants
|
||||||
@@ -109,13 +110,9 @@ vet:
|
|||||||
generate: controller-gen
|
generate: controller-gen
|
||||||
$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..."
|
$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..."
|
||||||
|
|
||||||
# Build the docker image
|
|
||||||
docker-build:
|
|
||||||
docker build -t ${NAME}:${VERSION} .
|
|
||||||
docker build -t ${RUNNER_NAME}:${RUNNER_TAG} --build-arg TARGETPLATFORM=${TARGETPLATFORM} runner
|
|
||||||
|
|
||||||
docker-buildx:
|
docker-buildx:
|
||||||
export DOCKER_CLI_EXPERIMENTAL=enabled
|
export DOCKER_CLI_EXPERIMENTAL=enabled ;\
|
||||||
|
export DOCKER_BUILDKIT=1
|
||||||
@if ! docker buildx ls | grep -q container-builder; then\
|
@if ! docker buildx ls | grep -q container-builder; then\
|
||||||
docker buildx create --platform ${PLATFORMS} --name container-builder --use;\
|
docker buildx create --platform ${PLATFORMS} --name container-builder --use;\
|
||||||
fi
|
fi
|
||||||
@@ -191,12 +188,14 @@ acceptance/deploy:
|
|||||||
TEST_ORG=${TEST_ORG} TEST_ORG_REPO=${TEST_ORG_REPO} SYNC_PERIOD=${SYNC_PERIOD} \
|
TEST_ORG=${TEST_ORG} TEST_ORG_REPO=${TEST_ORG_REPO} SYNC_PERIOD=${SYNC_PERIOD} \
|
||||||
USE_RUNNERSET=${USE_RUNNERSET} \
|
USE_RUNNERSET=${USE_RUNNERSET} \
|
||||||
TEST_EPHEMERAL=${TEST_EPHEMERAL} \
|
TEST_EPHEMERAL=${TEST_EPHEMERAL} \
|
||||||
RUNNER_FEATURE_FLAG_EPHEMERAL=${RUNNER_FEATURE_FLAG_EPHEMERAL} \
|
|
||||||
acceptance/deploy.sh
|
acceptance/deploy.sh
|
||||||
|
|
||||||
acceptance/tests:
|
acceptance/tests:
|
||||||
acceptance/checks.sh
|
acceptance/checks.sh
|
||||||
|
|
||||||
|
acceptance/runner/entrypoint:
|
||||||
|
cd test/entrypoint/ && bash test.sh
|
||||||
|
|
||||||
# We use -count=1 instead of `go clean -testcache`
|
# We use -count=1 instead of `go clean -testcache`
|
||||||
# See https://terratest.gruntwork.io/docs/testing-best-practices/avoid-test-caching/
|
# See https://terratest.gruntwork.io/docs/testing-best-practices/avoid-test-caching/
|
||||||
.PHONY: e2e
|
.PHONY: e2e
|
||||||
@@ -223,7 +222,7 @@ ifeq (, $(wildcard $(GOBIN)/controller-gen))
|
|||||||
CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\
|
CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\
|
||||||
cd $$CONTROLLER_GEN_TMP_DIR ;\
|
cd $$CONTROLLER_GEN_TMP_DIR ;\
|
||||||
go mod init tmp ;\
|
go mod init tmp ;\
|
||||||
go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.7.0 ;\
|
go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.7.0 ;\
|
||||||
rm -rf $$CONTROLLER_GEN_TMP_DIR ;\
|
rm -rf $$CONTROLLER_GEN_TMP_DIR ;\
|
||||||
}
|
}
|
||||||
endif
|
endif
|
||||||
@@ -243,7 +242,7 @@ ifeq (, $(wildcard $(GOBIN)/yq))
|
|||||||
YQ_TMP_DIR=$$(mktemp -d) ;\
|
YQ_TMP_DIR=$$(mktemp -d) ;\
|
||||||
cd $$YQ_TMP_DIR ;\
|
cd $$YQ_TMP_DIR ;\
|
||||||
go mod init tmp ;\
|
go mod init tmp ;\
|
||||||
go get github.com/mikefarah/yq/v3@3.4.0 ;\
|
go install github.com/mikefarah/yq/v3@3.4.0 ;\
|
||||||
rm -rf $$YQ_TMP_DIR ;\
|
rm -rf $$YQ_TMP_DIR ;\
|
||||||
}
|
}
|
||||||
endif
|
endif
|
||||||
|
|||||||
637
README.md
637
README.md
@@ -1,12 +1,15 @@
|
|||||||
# actions-runner-controller
|
# actions-runner-controller (ARC)
|
||||||
|
|
||||||
|
[](https://bestpractices.coreinfrastructure.org/projects/6061)
|
||||||
[](https://github.com/jonico/awesome-runners)
|
[](https://github.com/jonico/awesome-runners)
|
||||||
|
|
||||||
This controller operates self-hosted runners for GitHub Actions on your Kubernetes cluster.
|
This controller operates self-hosted runners for GitHub Actions on your Kubernetes cluster.
|
||||||
|
|
||||||
ToC:
|
ToC:
|
||||||
|
|
||||||
- [Motivation](#motivation)
|
- [People](#people)
|
||||||
|
- [Status](#status)
|
||||||
|
- [About](#about)
|
||||||
- [Installation](#installation)
|
- [Installation](#installation)
|
||||||
- [GitHub Enterprise Support](#github-enterprise-support)
|
- [GitHub Enterprise Support](#github-enterprise-support)
|
||||||
- [Setting Up Authentication with GitHub API](#setting-up-authentication-with-github-api)
|
- [Setting Up Authentication with GitHub API](#setting-up-authentication-with-github-api)
|
||||||
@@ -18,6 +21,8 @@ ToC:
|
|||||||
- [Organization Runners](#organization-runners)
|
- [Organization Runners](#organization-runners)
|
||||||
- [Enterprise Runners](#enterprise-runners)
|
- [Enterprise Runners](#enterprise-runners)
|
||||||
- [RunnerDeployments](#runnerdeployments)
|
- [RunnerDeployments](#runnerdeployments)
|
||||||
|
- [RunnerSets](#runnersets)
|
||||||
|
- [Persistent Runners](#persistent-runners)
|
||||||
- [Autoscaling](#autoscaling)
|
- [Autoscaling](#autoscaling)
|
||||||
- [Anti-Flapping Configuration](#anti-flapping-configuration)
|
- [Anti-Flapping Configuration](#anti-flapping-configuration)
|
||||||
- [Pull Driven Scaling](#pull-driven-scaling)
|
- [Pull Driven Scaling](#pull-driven-scaling)
|
||||||
@@ -26,37 +31,57 @@ ToC:
|
|||||||
- [Scheduled Overrides](#scheduled-overrides)
|
- [Scheduled Overrides](#scheduled-overrides)
|
||||||
- [Runner with DinD](#runner-with-dind)
|
- [Runner with DinD](#runner-with-dind)
|
||||||
- [Additional Tweaks](#additional-tweaks)
|
- [Additional Tweaks](#additional-tweaks)
|
||||||
|
- [Custom Volume mounts](#custom-volume-mounts)
|
||||||
- [Runner Labels](#runner-labels)
|
- [Runner Labels](#runner-labels)
|
||||||
- [Runner Groups](#runner-groups)
|
- [Runner Groups](#runner-groups)
|
||||||
- [Runner Entrypoint Features](#runner-entrypoint-features)
|
- [Runner Entrypoint Features](#runner-entrypoint-features)
|
||||||
- [Using IRSA (IAM Roles for Service Accounts) in EKS](#using-irsa-iam-roles-for-service-accounts-in-eks)
|
- [Using IRSA (IAM Roles for Service Accounts) in EKS](#using-irsa-iam-roles-for-service-accounts-in-eks)
|
||||||
- [Stateful Runners](#stateful-runners)
|
|
||||||
- [Ephemeral Runners](#ephemeral-runners)
|
|
||||||
- [Software Installed in the Runner Image](#software-installed-in-the-runner-image)
|
- [Software Installed in the Runner Image](#software-installed-in-the-runner-image)
|
||||||
- [Using without cert-manager](#using-without-cert-manager)
|
- [Using without cert-manager](#using-without-cert-manager)
|
||||||
- [Common Errors](#common-errors)
|
|
||||||
- [Troubleshooting](#troubleshooting)
|
- [Troubleshooting](#troubleshooting)
|
||||||
- [Contributing](#contributing)
|
- [Contributing](#contributing)
|
||||||
|
|
||||||
## Motivation
|
|
||||||
|
## People
|
||||||
|
|
||||||
|
`actions-runner-controller` is an open-source project currently developed and maintained in collaboration with maintainers @mumoshu and @toast-gear, various [contributors](https://github.com/actions-runner-controller/actions-runner-controller/graphs/contributors), and the [awesome community](https://github.com/actions-runner-controller/actions-runner-controller/discussions), mostly in their spare time.
|
||||||
|
|
||||||
|
If you think the project is awesome and it's becoming a basis for your important business, consider [sponsoring us](https://github.com/sponsors/actions-runner-controller)!
|
||||||
|
|
||||||
|
In case you are already the employer of one of contributors, sponsoring via GitHub Sponsors might not be an option. Just support them in other means!
|
||||||
|
|
||||||
|
We don't currently have [any sponsors dedicated to this project yet](https://github.com/sponsors/actions-runner-controller).
|
||||||
|
|
||||||
|
However, [HelloFresh](https://www.hellofreshgroup.com/en/) has recently started sponsoring @mumoshu for this project along with his other works. A part of their sponsorship will enable @mumoshu to add an E2E test to keep ARC even more reliable on AWS. Thank you for your sponsorship!
|
||||||
|
|
||||||
|
[<img src="https://user-images.githubusercontent.com/22009/170898715-07f02941-35ec-418b-8cd4-251b422fa9ac.png" width="219" height="71" />](https://careers.hellofresh.com/)
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Even though actions-runner-controller is used in production environments, it is still in its early stage of development, hence versioned 0.x.
|
||||||
|
|
||||||
|
actions-runner-controller complies to Semantic Versioning 2.0.0 in which v0.x means that there could be backward-incompatible changes for every release.
|
||||||
|
|
||||||
|
The documentation is kept inline with master@HEAD, we do our best to highlight any features that require a specific ARC version or higher however this is not always easily done due to there being many moving parts. Additionally, we actively do not retain compatibly with every GitHub Enterprise Server version nor every Kubernetes version so you will need to ensure you stay current within a reasonable timespan.
|
||||||
|
|
||||||
|
## About
|
||||||
|
|
||||||
[GitHub Actions](https://github.com/features/actions) is a very useful tool for automating development. GitHub Actions jobs are run in the cloud by default, but you may want to run your jobs in your environment. [Self-hosted runner](https://github.com/actions/runner) can be used for such use cases, but requires the provisioning and configuration of a virtual machine instance. Instead if you already have a Kubernetes cluster, it makes more sense to run the self-hosted runner on top of it.
|
[GitHub Actions](https://github.com/features/actions) is a very useful tool for automating development. GitHub Actions jobs are run in the cloud by default, but you may want to run your jobs in your environment. [Self-hosted runner](https://github.com/actions/runner) can be used for such use cases, but requires the provisioning and configuration of a virtual machine instance. Instead if you already have a Kubernetes cluster, it makes more sense to run the self-hosted runner on top of it.
|
||||||
|
|
||||||
**actions-runner-controller** makes that possible. Just create a *Runner* resource on your Kubernetes, and it will run and operate the self-hosted runner for the specified repository. Combined with Kubernetes RBAC, you can also build simple Self-hosted runners as a Service.
|
**actions-runner-controller** makes that possible. Just create a *Runner* resource on your Kubernetes, and it will run and operate the self-hosted runner for the specified repository. Combined with Kubernetes RBAC, you can also build simple Self-hosted runners as a Service.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
By default, actions-runner-controller uses [cert-manager](https://cert-manager.io/docs/installation/kubernetes/) for certificate management of Admission Webhook. Make sure you have already installed cert-manager before you install. The installation instructions for cert-manager can be found below.
|
By default, actions-runner-controller uses [cert-manager](https://cert-manager.io/docs/installation/kubernetes/) for certificate management of Admission Webhook. Make sure you have already installed cert-manager before you install. The installation instructions for the cert-manager can be found below.
|
||||||
|
|
||||||
- [Installing cert-manager on Kubernetes](https://cert-manager.io/docs/installation/kubernetes/)
|
- [Installing cert-manager on Kubernetes](https://cert-manager.io/docs/installation/kubernetes/)
|
||||||
|
|
||||||
Subsequent to this, install the custom resource definitions and actions-runner-controller with `kubectl` or `helm`. This will create actions-runner-system namespace in your Kubernetes and deploy the required resources.
|
After installing cert-manager, install the custom resource definitions and actions-runner-controller with `kubectl` or `helm`. This will create an actions-runner-system namespace in your Kubernetes and deploy the required resources.
|
||||||
|
|
||||||
**Kubectl Deployment:**
|
**Kubectl Deployment:**
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# REPLACE "v0.20.2" with the version you wish to deploy
|
# REPLACE "v0.22.0" with the version you wish to deploy
|
||||||
kubectl apply -f https://github.com/actions-runner-controller/actions-runner-controller/releases/download/v0.20.2/actions-runner-controller.yaml
|
kubectl apply -f https://github.com/actions-runner-controller/actions-runner-controller/releases/download/v0.22.0/actions-runner-controller.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
**Helm Deployment:**
|
**Helm Deployment:**
|
||||||
@@ -81,7 +106,7 @@ When deploying the solution for a GHES environment you need to provide an additi
|
|||||||
kubectl set env deploy controller-manager -c manager GITHUB_ENTERPRISE_URL=<GHEC/S URL> --namespace actions-runner-system
|
kubectl set env deploy controller-manager -c manager GITHUB_ENTERPRISE_URL=<GHEC/S URL> --namespace actions-runner-system
|
||||||
```
|
```
|
||||||
|
|
||||||
**_Note: The repository maintainers do not have an enterprise environment (cloud or server). Support for the enterprise specific feature set is community driven and on a best effort basis. PRs from the community are welcomed to add features and maintain support._**
|
**_Note: The repository maintainers do not have an enterprise environment (cloud or server). Support for the enterprise specific feature set is community driven and on a best effort basis. PRs from the community are welcome to add features and maintain support._**
|
||||||
|
|
||||||
## Setting Up Authentication with GitHub API
|
## Setting Up Authentication with GitHub API
|
||||||
|
|
||||||
@@ -90,7 +115,7 @@ There are two ways for actions-runner-controller to authenticate with the GitHub
|
|||||||
1. Using a GitHub App (not supported for enterprise level runners due to lack of support from GitHub)
|
1. Using a GitHub App (not supported for enterprise level runners due to lack of support from GitHub)
|
||||||
2. Using a PAT
|
2. Using a PAT
|
||||||
|
|
||||||
Functionality wise, there isn't much of a difference between the 2 authentication methods. The primarily benefit of authenticating via a GitHub App is an [increased API quota](https://docs.github.com/en/developers/apps/rate-limits-for-github-apps).
|
Functionality wise, there isn't much of a difference between the 2 authentication methods. The primary benefit of authenticating via a GitHub App is an [increased API quota](https://docs.github.com/en/developers/apps/rate-limits-for-github-apps).
|
||||||
|
|
||||||
If you are deploying the solution for a GHES environment you are able to [configure your rate limit settings](https://docs.github.com/en/enterprise-server@3.0/admin/configuration/configuring-rate-limits) making the main benefit irrelevant. If you're deploying the solution for a GHEC or regular GitHub environment and you run into rate limit issues, consider deploying the solution using the GitHub App authentication method instead.
|
If you are deploying the solution for a GHES environment you are able to [configure your rate limit settings](https://docs.github.com/en/enterprise-server@3.0/admin/configuration/configuring-rate-limits) making the main benefit irrelevant. If you're deploying the solution for a GHEC or regular GitHub environment and you run into rate limit issues, consider deploying the solution using the GitHub App authentication method instead.
|
||||||
|
|
||||||
@@ -156,7 +181,7 @@ When the installation is complete, you will be taken to a URL in one of the foll
|
|||||||
- `https://github.com/organizations/eventreactor/settings/installations/${INSTALLATION_ID}`
|
- `https://github.com/organizations/eventreactor/settings/installations/${INSTALLATION_ID}`
|
||||||
|
|
||||||
|
|
||||||
Finally, register the App ID (`APP_ID`), Installation ID (`INSTALLATION_ID`), and downloaded private key file (`PRIVATE_KEY_FILE_PATH`) to Kubernetes as Secret.
|
Finally, register the App ID (`APP_ID`), Installation ID (`INSTALLATION_ID`), and the downloaded private key file (`PRIVATE_KEY_FILE_PATH`) to Kubernetes as a secret.
|
||||||
|
|
||||||
**Kubectl Deployment:**
|
**Kubectl Deployment:**
|
||||||
|
|
||||||
@@ -196,9 +221,9 @@ Log-in to a GitHub account that has `admin` privileges for the repository, and [
|
|||||||
|
|
||||||
* admin:enterprise (manage_runners:enterprise)
|
* admin:enterprise (manage_runners:enterprise)
|
||||||
|
|
||||||
_Note: When you deploy enterprise runners they will get access to organizations, however, access to the repositories themselves is **NOT** allowed by default. Each GitHub organization must allow enterprise runner groups to be used in repositories as an initial one time configuration step, this only needs to be done once after which it is permanent for that runner group._
|
_Note: When you deploy enterprise runners they will get access to organizations, however, access to the repositories themselves is **NOT** allowed by default. Each GitHub organization must allow enterprise runner groups to be used in repositories as an initial one-time configuration step, this only needs to be done once after which it is permanent for that runner group._
|
||||||
|
|
||||||
_Note: GitHub do not document exactly what permissions you get with each PAT scope beyond a vague description. The best documentation they provide on the topic can be found [here](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps) if you wish to review. The docs target OAuth apps and so are incomplete and amy not be 100% accurate._
|
_Note: GitHub does not document exactly what permissions you get with each PAT scope beyond a vague description. The best documentation they provide on the topic can be found [here](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps) if you wish to review. The docs target OAuth apps and so are incomplete and may not be 100% accurate._
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -220,20 +245,22 @@ Configure your values.yaml, see the chart's [README](./charts/actions-runner-con
|
|||||||
|
|
||||||
> This feature requires controller version => [v0.18.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.18.0)
|
> This feature requires controller version => [v0.18.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.18.0)
|
||||||
|
|
||||||
**_Note: Be aware when using this feature that CRDs are cluster wide and so you should upgrade all of your controllers (and your CRDs) as the same time if you are doing an upgrade. Do not mix and match CRD versions with different controller versions. Doing so risks out of control scaling._**
|
**_Note: Be aware when using this feature that CRDs are cluster-wide and so you should upgrade all of your controllers (and your CRDs) at the same time if you are doing an upgrade. Do not mix and match CRD versions with different controller versions. Doing so risks out of control scaling._**
|
||||||
|
|
||||||
By default the controller will look for runners in all namespaces, the watch namespace feature allows you to restrict the controller to monitoring a single namespace. This then lets you deploy multiple controllers in a single cluster. You may want to do this either because you wish to scale beyond the API rate limit of a single PAT / GitHub App configuration or you wish to support multiple GitHub organizations with runners installed at the organization level in a single cluster.
|
By default the controller will look for runners in all namespaces, the watch namespace feature allows you to restrict the controller to monitoring a single namespace. This then lets you deploy multiple controllers in a single cluster. You may want to do this either because you wish to scale beyond the API rate limit of a single PAT / GitHub App configuration or you wish to support multiple GitHub organizations with runners installed at the organization level in a single cluster.
|
||||||
|
|
||||||
This feature is configured via the controller's `--watch-namespace` flag. When a namespace is provided via this flag, the controller will only monitor runners in that namespace.
|
This feature is configured via the controller's `--watch-namespace` flag. When a namespace is provided via this flag, the controller will only monitor runners in that namespace.
|
||||||
|
|
||||||
If you plan on installing all instances of the controller stack into a single namespace you will need to make the names of the resources unique to each stack. In the case of Helm this can be done by giving each install a unique release name, or via the `fullnameOverride` properties.
|
You can deploy multiple controllers either in a single shared namespace, or in a unique namespace per controller.
|
||||||
|
|
||||||
Alternatively, you can install each controller stack into its own unique namespace (relative to other controller stacks in the cluster), avoiding the need to uniquely prefix resources.
|
If you plan on installing all instances of the controller stack into a single namespace there are a few things you need to do for this to work.
|
||||||
|
|
||||||
When you go to the route of sharing the namespace while giving each a unique Helm release name, you must also ensure the following values are configured correctly:
|
1. All resources per stack must have a unique, in the case of Helm this can be done by giving each install a unique release name, or via the `fullnameOverride` properties.
|
||||||
|
2. `authSecret.name` needs to be unique per stack when each stack is tied to runners in different GitHub organizations and repositories AND you want your GitHub credentials to be narrowly scoped.
|
||||||
|
3. `leaderElectionId` needs to be unique per stack. If this is not unique to the stack the controller tries to race onto the leader election lock resulting in only one stack working concurrently. Your controller will be stuck with a log message something like this `attempting to acquire leader lease arc-controllers/actions-runner-controller...`
|
||||||
|
4. The MutatingWebhookConfiguration in each stack must include a namespace selector for that stack's corresponding runner namespace, this is already configured in the helm chart.
|
||||||
|
|
||||||
- `authSecret.name` needs be unique per stack when each stack is tied to runners in different GitHub organizations and repositories AND you want your GitHub credentials to narrowly scoped.
|
Alternatively, you can install each controller stack into a unique namespace (relative to other controller stacks in the cluster). Implementing ARC this way avoids the first, second and third pitfalls (you still need to set the corresponding namespace selector for each stack's mutating webhook)
|
||||||
- `leaderElectionId` needs to be unique per stack. If this is not unique to the stack the controller tries to race onto the leader election lock and resulting in only one stack working concurrently.
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
@@ -249,7 +276,7 @@ There are two ways to use this controller:
|
|||||||
|
|
||||||
### Repository Runners
|
### Repository Runners
|
||||||
|
|
||||||
To launch a single self-hosted runner, you need to create a manifest file includes `Runner` resource as follows. This example launches a self-hosted runner with name *example-runner* for the *actions-runner-controller/actions-runner-controller* repository.
|
To launch a single self-hosted runner, you need to create a manifest file that includes a `Runner` resource as follows. This example launches a self-hosted runner with name *example-runner* for the *actions-runner-controller/actions-runner-controller* repository.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# runner.yaml
|
# runner.yaml
|
||||||
@@ -361,21 +388,143 @@ example-runnerdeploy2475h595fr mumoshu/actions-runner-controller-ci Running
|
|||||||
example-runnerdeploy2475ht2qbr mumoshu/actions-runner-controller-ci Running
|
example-runnerdeploy2475ht2qbr mumoshu/actions-runner-controller-ci Running
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### RunnerSets
|
||||||
|
|
||||||
|
> This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0)
|
||||||
|
|
||||||
|
_Ensure you see the limitations before using this kind!!!!!_
|
||||||
|
|
||||||
|
For scenarios where you require the advantages of a `StatefulSet`, for example persistent storage, ARC implements a runner based on Kubernetes' `StatefulSets`, the `RunnerSet`.
|
||||||
|
|
||||||
|
A basic `RunnerSet` would look like this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
ephemeral: false
|
||||||
|
replicas: 2
|
||||||
|
repository: mumoshu/actions-runner-controller-ci
|
||||||
|
# Other mandatory fields from StatefulSet
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: example
|
||||||
|
serviceName: example
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: example
|
||||||
|
```
|
||||||
|
|
||||||
|
As it is based on `StatefulSet`, `selector` and `template.medatada.labels` it needs to be defined and have the exact same set of labels. `serviceName` must be set to some non-empty string as it is also required by `StatefulSet`.
|
||||||
|
|
||||||
|
Runner-related fields like `ephemeral`, `repository`, `organization`, `enterprise`, and so on should be written directly under `spec`.
|
||||||
|
|
||||||
|
Fields like `volumeClaimTemplates` that originates from `StatefulSet` should also be written directly under `spec`.
|
||||||
|
|
||||||
|
Pod-related fields like security contexts and volumes are written under `spec.template.spec` like `StatefulSet`.
|
||||||
|
|
||||||
|
Similarly, container-related fields like resource requests and limits, container image names and tags, security context, and so on are written under `spec.template.spec.containers`. There are two reserved container `name`, `runner` and `docker`. The former is for the container that runs [actions runner](https://github.com/actions/runner) and the latter is for the container that runs a `dockerd`.
|
||||||
|
|
||||||
|
For a more complex example, see the below:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
ephemeral: false
|
||||||
|
replicas: 2
|
||||||
|
repository: mumoshu/actions-runner-controller-ci
|
||||||
|
dockerdWithinRunnerContainer: true
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
# All level/role/type/user values will vary based on your SELinux policies.
|
||||||
|
# See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux_atomic_host/7/html/container_security_guide/docker_selinux_security_policy for information about SELinux with containers
|
||||||
|
seLinuxOptions:
|
||||||
|
level: "s0"
|
||||||
|
role: "system_r"
|
||||||
|
type: "super_t"
|
||||||
|
user: "system_u"
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
env: []
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "4.0"
|
||||||
|
memory: "8Gi"
|
||||||
|
requests:
|
||||||
|
cpu: "2.0"
|
||||||
|
memory: "4Gi"
|
||||||
|
# This is an advanced configuration. Don't touch it unless you know what you're doing.
|
||||||
|
securityContext:
|
||||||
|
# Usually, the runner container's privileged field is derived from dockerdWithinRunnerContainer.
|
||||||
|
# But in the case where you need to run privileged job steps even if you don't use docker/don't need dockerd within the runner container,
|
||||||
|
# just specified `privileged: true` like this.
|
||||||
|
# See https://github.com/actions-runner-controller/actions-runner-controller/issues/1282
|
||||||
|
# Do note that specifying `privileged: false` while using dind is very likely to fail, even if you use some vm-based container runtimes
|
||||||
|
# like firecracker and kata. Basically they run containers within dedicated micro vms and so
|
||||||
|
# it's more like you can use `privileged: true` safer with those runtimes.
|
||||||
|
#
|
||||||
|
# privileged: true
|
||||||
|
- name: docker
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "4.0"
|
||||||
|
memory: "8Gi"
|
||||||
|
requests:
|
||||||
|
cpu: "2.0"
|
||||||
|
memory: "4Gi"
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also read the design and usage documentation written in the original pull request that introduced `RunnerSet` for more information [#629](https://github.com/actions-runner-controller/actions-runner-controller/pull/629).
|
||||||
|
|
||||||
|
Under the hood, `RunnerSet` relies on Kubernetes's `StatefulSet` and Mutating Webhook. A `statefulset` is used to create a number of pods that has stable names and dynamically provisioned persistent volumes, so that each `statefulset-managed` pod gets the same persistent volume even after restarting. A mutating webhook is used to dynamically inject a runner's "registration token" which is used to call GitHub's "Create Runner" API.
|
||||||
|
|
||||||
|
**Limitations**
|
||||||
|
|
||||||
|
* For autoscaling the `RunnerSet` kind only supports pull driven scaling or the `workflow_job` event for webhook driven scaling.
|
||||||
|
|
||||||
|
### Persistent Runners
|
||||||
|
|
||||||
|
Every runner managed by ARC is "ephemeral" by default. The life of an ephemeral runner managed by ARC looks like this- ARC creates a runner pod for the runner. As it's an ephemeral runner, the `--ephemeral` flag is passed to the `actions/runner` agent that runs within the `runner` container of the runner pod.
|
||||||
|
|
||||||
|
`--ephemeral` is an `actions/runner` feature that instructs the runner to stop and de-register itself after the first job run.
|
||||||
|
|
||||||
|
Once the ephemeral runner has completed running a workflow job, it stops with a status code of 0, hence the runner pod is marked as completed, removed by ARC.
|
||||||
|
|
||||||
|
As it's removed after a workflow job run, the runner pod is never reused across multiple GitHub Actions workflow jobs, providing you a clean environment per each workflow job.
|
||||||
|
|
||||||
|
Although not generally recommended, it's possible to disable the passing of the `--ephemeral` flag by explicitly setting `ephemeral: false` in the `RunnerDeployment` or `RunnerSet` spec. When disabled, your runner becomes "persistent". A persistent runner does not stop after workflow job ends, and in this mode `actions/runner` is known to clean only runner's work dir after each job. Whilst this can seem helpful it creates a non-deterministic environment which is not ideal for a CI/CD environment. Between runs, your actions cache, docker images stored in the `dind` and layer cache, globally installed packages etc are retained across multiple workflow job runs which can cause issues that are hard to debug and inconsistent.
|
||||||
|
|
||||||
|
Persistent runners are available as an option for some edge cases however they are not preferred as they can create challenges around providing a deterministic and secure environment.
|
||||||
|
|
||||||
### Autoscaling
|
### Autoscaling
|
||||||
|
|
||||||
> Since the release of GitHub's [`workflow_job` webhook](https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job), webhook driven scaling is the preferred way of autoscaling as it enables targeted scaling of your `RunnerDeployment` / `RunnerSet` as it includes the `runs-on` information needed to scale the appropriate runners for that workflow run. More broadly, webhook driven scaling is the preferred scaling option as it is far quicker compared to the pull driven scaling and is easy to setup.
|
> Since the release of GitHub's [`workflow_job` webhook](https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job), webhook driven scaling is the preferred way of autoscaling as it enables targeted scaling of your `RunnerDeployment` / `RunnerSet` as it includes the `runs-on` information needed to scale the appropriate runners for that workflow run. More broadly, webhook driven scaling is the preferred scaling option as it is far quicker compared to the pull driven scaling and is easy to set up.
|
||||||
|
|
||||||
A `RunnerDeployment` or `RunnerSet` (see [stateful runners](#stateful-runners) for more details on this kind) can scale the number of runners between `minReplicas` and `maxReplicas` fields driven by either pull based scaling metrics or via a webhook event (see limitations section of [stateful runners](#stateful-runners) for cavaets of this kind). Whether the autoscaling is driven from a webhook event or pull based metrics it is implemented by backing a `RunnerDeployment` or `RunnerSet` kind with a `HorizontalRunnerAutoscaler` kind.
|
> If you are using controller version < [v0.22.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.22.0) and you are not using GHES, and so can't set your rate limit budget, it is recommended that you use 100 replicas or fewer to prevent being rate limited.
|
||||||
|
|
||||||
|
A `RunnerDeployment` or `RunnerSet` can scale the number of runners between `minReplicas` and `maxReplicas` fields driven by either pull based scaling metrics or via a webhook event (see limitations section of [RunnerSets](#runnersets) for caveats of this kind). Whether the autoscaling is driven from a webhook event or pull based metrics it is implemented by backing a `RunnerDeployment` or `RunnerSet` kind with a `HorizontalRunnerAutoscaler` kind.
|
||||||
|
|
||||||
**_Important!!! If you opt to configure autoscaling, ensure you remove the `replicas:` attribute in the `RunnerDeployment` / `RunnerSet` kinds that are configured for autoscaling [#206](https://github.com/actions-runner-controller/actions-runner-controller/issues/206#issuecomment-748601907)_**
|
**_Important!!! If you opt to configure autoscaling, ensure you remove the `replicas:` attribute in the `RunnerDeployment` / `RunnerSet` kinds that are configured for autoscaling [#206](https://github.com/actions-runner-controller/actions-runner-controller/issues/206#issuecomment-748601907)_**
|
||||||
|
|
||||||
#### Anti-Flapping Configuration
|
#### Anti-Flapping Configuration
|
||||||
|
|
||||||
For both pull driven or webhook driven scaling an anti-flapping implementation is included, by default a runner won't be scaled down within 10 minutes of it having been scaled up. This delay is configurable by including the attribute `scaleDownDelaySecondsAfterScaleOut:` in a `HorizontalRunnerAutoscaler` kind's `spec:`.
|
For both pull driven or webhook driven scaling an anti-flapping implementation is included, by default a runner won't be scaled down within 10 minutes of it having been scaled up.
|
||||||
|
|
||||||
This configuration has the final say on if a runner can be scaled down or not regardless of the chosen scaling method. Depending on your requirements, you may want to consider adjusting this by setting the `scaleDownDelaySecondsAfterScaleOut:` attribute.
|
This anti-flap configuration also has the final say on if a runner can be scaled down or not regardless of the chosen scaling method.
|
||||||
|
|
||||||
Below is a complete basic example with one of the pull driven scaling metrics.
|
This delay is configurable via 2 methods:
|
||||||
|
|
||||||
|
1. By setting a new default via the controller's `--default-scale-down-delay` flag
|
||||||
|
2. By setting by setting the attribute `scaleDownDelaySecondsAfterScaleOut:` in a `HorizontalRunnerAutoscaler` kind's `spec:`.
|
||||||
|
|
||||||
|
Below is a complete basic example of one of the pull driven scaling metrics.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
@@ -413,7 +562,9 @@ spec:
|
|||||||
|
|
||||||
> To configure webhook driven scaling see the [Webhook Driven Scaling](#webhook-driven-scaling) section
|
> To configure webhook driven scaling see the [Webhook Driven Scaling](#webhook-driven-scaling) section
|
||||||
|
|
||||||
The pull based metrics are configured in the `metrics` attribute of a HRA (see snippet below). The period between polls is defined by the controller's `--sync-period` flag. If this flag isn't provided then the controller defaults to a sync period of 10 minutes. The default value is set to 10 minutes to prevent default deployments rate limiting themselves from the GitHub API, you will most likely want to adjust this.
|
The pull based metrics are configured in the `metrics` attribute of a HRA (see snippet below). The period between polls is defined by the controller's `--sync-period` flag. If this flag isn't provided then the controller defaults to a sync period of `1m`, this can be configured in seconds or minutes.
|
||||||
|
|
||||||
|
Be aware that the shorter the sync period the quicker you will consume your rate limit budget, depending on your environment this may or may not be a risk. Consider monitoring ARCs rate limit budget when configuring this feature to find the optimal performance sync period.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
@@ -440,14 +591,13 @@ The `TotalNumberOfQueuedAndInProgressWorkflowRuns` metric polls GitHub for all p
|
|||||||
|
|
||||||
**Benefits of this metric**
|
**Benefits of this metric**
|
||||||
1. Supports named repositories allowing you to restrict the runner to a specified set of repositories server-side.
|
1. Supports named repositories allowing you to restrict the runner to a specified set of repositories server-side.
|
||||||
2. Scales the runner count based on the depth of the job queue meaning a more 1:1 scaling of runners to queued jobs (caveat, see drawback #4)
|
2. Scales the runner count based on the depth of the job queue meaning a 1:1 scaling of runners to queued jobs.
|
||||||
3. Like all scaling metrics, you can manage workflow allocation to the RunnerDeployment through the use of [GitHub labels](#runner-labels).
|
3. Like all scaling metrics, you can manage workflow allocation to the RunnerDeployment through the use of [GitHub labels](#runner-labels).
|
||||||
|
|
||||||
**Drawbacks of this metric**
|
**Drawbacks of this metric**
|
||||||
1. A list of repositories must be included within the scaling metric. Maintaining a list of repositories may not be viable in larger environments or self-serve environments.
|
1. A list of repositories must be included within the scaling metric. Maintaining a list of repositories may not be viable in larger environments or self-serve environments.
|
||||||
2. May not scale quick enough for some users needs. This metric is pull based and so the queue depth is polled as configured by the sync period, as a result scaling performance is bound by this sync period meaning there is a lag to scaling activity.
|
2. May not scale quickly enough for some users' needs. This metric is pull based and so the queue depth is polled as configured by the sync period, as a result scaling performance is bound by this sync period meaning there is a lag to scaling activity.
|
||||||
3. Relatively large amounts of API requests required to maintain this metric, you may run in API rate limit issues depending on the size of your environment and how aggressive your sync period configuration is.
|
3. Relatively large amounts of API requests are required to maintain this metric, you may run into API rate limit issues depending on the size of your environment and how aggressive your sync period configuration is.
|
||||||
4. The GitHub API doesn't provide a way to filter workflow jobs to just those targeting self-hosted runners. If your environment's workflows target both self-hosted and GitHub hosted runners then the queue depth this metric scales against isn't a true 1:1 mapping of queue depth to required runner count. As a result of this, this metric may scale too aggressively for your actual self-hosted runner count needs.
|
|
||||||
|
|
||||||
Example `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`:
|
Example `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`:
|
||||||
|
|
||||||
@@ -468,7 +618,7 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
scaleTargetRef:
|
scaleTargetRef:
|
||||||
name: example-runner-deployment
|
name: example-runner-deployment
|
||||||
# Uncomment the below in case the target is not RunnerDeployment but RunnerSet
|
# IMPORTANT : If your HRA is targeting a RunnerSet you must specify the kind in the scaleTargetRef:, uncomment the below
|
||||||
#kind: RunnerSet
|
#kind: RunnerSet
|
||||||
minReplicas: 1
|
minReplicas: 1
|
||||||
maxReplicas: 5
|
maxReplicas: 5
|
||||||
@@ -489,7 +639,7 @@ The `HorizontalRunnerAutoscaler` will poll GitHub for the number of runners in t
|
|||||||
4. Supports scaling desired runner count on both a percentage increase / decrease basis as well as on a fixed increase / decrease count basis [#223](https://github.com/actions-runner-controller/actions-runner-controller/pull/223) [#315](https://github.com/actions-runner-controller/actions-runner-controller/pull/315)
|
4. Supports scaling desired runner count on both a percentage increase / decrease basis as well as on a fixed increase / decrease count basis [#223](https://github.com/actions-runner-controller/actions-runner-controller/pull/223) [#315](https://github.com/actions-runner-controller/actions-runner-controller/pull/315)
|
||||||
|
|
||||||
**Drawbacks of this metric**
|
**Drawbacks of this metric**
|
||||||
1. May not scale quick enough for some users needs. This metric is pull based and so the number of busy runners are polled as configured by the sync period, as a result scaling performance is bound by this sync period meaning there is a lag to scaling activity.
|
1. May not scale quickly enough for some users' needs. This metric is pull based and so the number of busy runners is polled as configured by the sync period, as a result scaling performance is bound by this sync period meaning there is a lag to scaling activity.
|
||||||
2. We are scaling up and down based on indicative information rather than a count of the actual number of queued jobs and so the desired runner count is likely to under provision new runners or overprovision them relative to actual job queue depth, this may or may not be a problem for you.
|
2. We are scaling up and down based on indicative information rather than a count of the actual number of queued jobs and so the desired runner count is likely to under provision new runners or overprovision them relative to actual job queue depth, this may or may not be a problem for you.
|
||||||
|
|
||||||
Examples of each scaling type implemented with a `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`:
|
Examples of each scaling type implemented with a `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`:
|
||||||
@@ -540,10 +690,10 @@ spec:
|
|||||||
|
|
||||||
> To configure pull driven scaling see the [Pull Driven Scaling](#pull-driven-scaling) section
|
> To configure pull driven scaling see the [Pull Driven Scaling](#pull-driven-scaling) section
|
||||||
|
|
||||||
Webhooks are processed by a seperate webhook server. The webhook server receives GitHub Webhook events and scales
|
Webhooks are processed by a separate webhook server. The webhook server receives GitHub Webhook events and scales
|
||||||
[`RunnerDeployments`](#runnerdeployments) by updating corresponding [`HorizontalRunnerAutoscalers`](#autoscaling).
|
[`RunnerDeployments`](#runnerdeployments) by updating corresponding [`HorizontalRunnerAutoscalers`](#autoscaling).
|
||||||
|
|
||||||
Today, the Webhook server can be configured to respond GitHub `check_run`, `workflow_job`, `pull_request` and `push` events
|
Today, the Webhook server can be configured to respond to GitHub's `check_run`, `workflow_job`, `pull_request`, and `push` events
|
||||||
by scaling up the matching `HorizontalRunnerAutoscaler` by N replica(s), where `N` is configurable within `HorizontalRunnerAutoscaler`'s `spec:`.
|
by scaling up the matching `HorizontalRunnerAutoscaler` by N replica(s), where `N` is configurable within `HorizontalRunnerAutoscaler`'s `spec:`.
|
||||||
|
|
||||||
More concretely, you can configure the targeted GitHub event types and the `N` in `scaleUpTriggers`:
|
More concretely, you can configure the targeted GitHub event types and the `N` in `scaleUpTriggers`:
|
||||||
@@ -566,27 +716,27 @@ spec:
|
|||||||
|
|
||||||
With the above example, the webhook server scales `example-runners` by `1` replica for 5 minutes on each `check_run` event with the type of `created` and the status of `queued` received.
|
With the above example, the webhook server scales `example-runners` by `1` replica for 5 minutes on each `check_run` event with the type of `created` and the status of `queued` received.
|
||||||
|
|
||||||
Of note is the `HRA.spec.scaleUpTriggers[].duration` attribute. This attribute is used to calculate if the replica number added via the trigger is expired or not. On each reconcilation loop, the controller sums up all the non-expiring replica numbers from previous scale up triggers. It then compares the summed desired replica number against the current replica number. If the summed desired replica number > the current number then it means the replica count needs to scale up.
|
Of note is the `HRA.spec.scaleUpTriggers[].duration` attribute. This attribute is used to calculate if the replica number added via the trigger is expired or not. On each reconciliation loop, the controller sums up all the non-expiring replica numbers from previous scale-up triggers. It then compares the summed desired replica number against the current replica number. If the summed desired replica number > the current number then it means the replica count needs to scale up.
|
||||||
|
|
||||||
As mentioned previously, the `scaleDownDelaySecondsAfterScaleOut` property has the final say still. If the latest scale-up time + the anti-flapping duration is later than the current time, it doesn’t immediately scale up and instead retries the calculation again later to see if it needs to scale yet.
|
As mentioned previously, the `scaleDownDelaySecondsAfterScaleOut` property has the final say still. If the latest scale-up time + the anti-flapping duration is later than the current time, it doesn’t immediately scale up and instead retries the calculation again later to see if it needs to scale yet.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
The primary benefit of autoscaling on Webhook compared to the pull driven scaling is that it is far quicker as it allows you to immediately add runners resource rather than waiting for the next sync period.
|
The primary benefit of autoscaling on Webhooks compared to the pull driven scaling is that it is far quicker as it allows you to immediately add runner resources rather than waiting for the next sync period.
|
||||||
|
|
||||||
> You can learn the implementation details in [#282](https://github.com/actions-runner-controller/actions-runner-controller/pull/282)
|
> You can learn the implementation details in [#282](https://github.com/actions-runner-controller/actions-runner-controller/pull/282)
|
||||||
|
|
||||||
To enable this feature, you firstly need to install the webhook server, currently, only our Helm chart has the ability install it:
|
To enable this feature, you first need to install the GitHub webhook server. To install via our Helm chart,
|
||||||
_[see the values documentation for all configuration options](https://github.com/actions-runner-controller/actions-runner-controller/blob/master/charts/actions-runner-controller/README.md)_
|
_[see the values documentation for all configuration options](https://github.com/actions-runner-controller/actions-runner-controller/blob/master/charts/actions-runner-controller/README.md)_
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ helm --upgrade install actions-runner-controller/actions-runner-controller \
|
$ helm upgrade --install --namespace actions-runner-system --create-namespace \
|
||||||
githubWebhookServer.enabled=true \
|
--wait actions-runner-controller actions-runner-controller/actions-runner-controller \
|
||||||
githubWebhookServer.ports[0].nodePort=33080
|
--set "githubWebhookServer.enabled=true,githubWebhookServer.ports[0].nodePort=33080"
|
||||||
```
|
```
|
||||||
|
|
||||||
The above command will result in exposing the node port 33080 for Webhook events. Usually, you need to create an
|
The above command will result in exposing the node port 33080 for Webhook events. Usually, you need to create an
|
||||||
external loadbalancer targeted to the node port, and register the hostname or the IP address of the external loadbalancer
|
external load balancer targeted to the node port, and register the hostname or the IP address of the external load balancer
|
||||||
to the GitHub Webhook.
|
to the GitHub Webhook.
|
||||||
|
|
||||||
Once you were able to confirm that the Webhook server is ready and running from GitHub - this is usually verified by the
|
Once you were able to confirm that the Webhook server is ready and running from GitHub - this is usually verified by the
|
||||||
@@ -598,13 +748,13 @@ by learning the following configuration examples.
|
|||||||
- [Example 3: Scale on each `pull_request` event against a given set of branches](#example-3-scale-on-each-pull_request-event-against-a-given-set-of-branches)
|
- [Example 3: Scale on each `pull_request` event against a given set of branches](#example-3-scale-on-each-pull_request-event-against-a-given-set-of-branches)
|
||||||
- [Example 4: Scale on each `push` event](#example-4-scale-on-each-push-event)
|
- [Example 4: Scale on each `push` event](#example-4-scale-on-each-push-event)
|
||||||
|
|
||||||
**Note:** All these examples should have **minReplicas** & **maxReplicas** as mandatory parameter even for webhook driven scaling.
|
**Note:** All these examples should have **minReplicas** & **maxReplicas** as mandatory parameters even for webhook driven scaling.
|
||||||
|
|
||||||
##### Example 1: Scale on each `workflow_job` event
|
##### Example 1: Scale on each `workflow_job` event
|
||||||
|
|
||||||
> This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0)
|
> This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0)
|
||||||
|
|
||||||
_Note: GitHub does not include the runner group information of a repository in the payload of `workflow_job` event in the initial `queued` event. The runner group information is only include for `workflow_job` events when the job has already been allocated to a runner (events with a status of `in_progress` or `completed`). Please do raise feature requests against [GitHub](https://support.github.com/tickets/personal/0) for this information to be included in the initial `queued` event if this would improve autoscaling runners for you._
|
_Note: GitHub does not include the runner group information of a repository in the payload of `workflow_job` event in the initial `queued` event. The runner group information is only included for `workflow_job` events when the job has already been allocated to a runner (events with a status of `in_progress` or `completed`). Please do raise feature requests against [GitHub](https://support.github.com/tickets/personal/0) for this information to be included in the initial `queued` event if this would improve autoscaling runners for you._
|
||||||
|
|
||||||
The most flexible webhook GitHub offers is the `workflow_job` webhook, it includes the `runs-on` information in the payload allowing scaling based on runner labels.
|
The most flexible webhook GitHub offers is the `workflow_job` webhook, it includes the `runs-on` information in the payload allowing scaling based on runner labels.
|
||||||
|
|
||||||
@@ -626,7 +776,8 @@ spec:
|
|||||||
# Uncomment the below in case the target is not RunnerDeployment but RunnerSet
|
# Uncomment the below in case the target is not RunnerDeployment but RunnerSet
|
||||||
#kind: RunnerSet
|
#kind: RunnerSet
|
||||||
scaleUpTriggers:
|
scaleUpTriggers:
|
||||||
- githubEvent: {}
|
- githubEvent:
|
||||||
|
workflowJob: {}
|
||||||
duration: "30m"
|
duration: "30m"
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -634,7 +785,7 @@ This webhook requires you to explicitly set the labels in the RunnerDeployment /
|
|||||||
|
|
||||||
You can configure your GitHub webhook settings to only include `Workflows Job` events, so that it sends us three kinds of `workflow_job` events per a job run.
|
You can configure your GitHub webhook settings to only include `Workflows Job` events, so that it sends us three kinds of `workflow_job` events per a job run.
|
||||||
|
|
||||||
Each kind has a `status` of `queued`, `in_progress` and `completed`. With the above configuration, `actions-runner-controller` adds one runner for a `workflow_job` event whose `status` is `queued`. Similarly, it removes one runner for a `workflow_job` event whose `status` is `completed`. The cavaet to this to remember is that this the scale down is within the bounds of your `scaleDownDelaySecondsAfterScaleOut` configuration, if this time hasn't past the scale down will be defered.
|
Each kind has a `status` of `queued`, `in_progress` and `completed`. With the above configuration, `actions-runner-controller` adds one runner for a `workflow_job` event whose `status` is `queued`. Similarly, it removes one runner for a `workflow_job` event whose `status` is `completed`. The caveat to this to remember is that this scale-down is within the bounds of your `scaleDownDelaySecondsAfterScaleOut` configuration, if this time hasn't passed the scale down will be deferred.
|
||||||
|
|
||||||
##### Example 2: Scale up on each `check_run` event
|
##### Example 2: Scale up on each `check_run` event
|
||||||
|
|
||||||
@@ -752,7 +903,7 @@ spec:
|
|||||||
|
|
||||||
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
||||||
|
|
||||||
The regular `RunnerDeployment` `replicas:` attribute as well as the `HorizontalRunnerAutoscaler` `minReplicas:` attribute supports being set to 0.
|
The regular `RunnerDeployment` / `RunnerSet` `replicas:` attribute as well as the `HorizontalRunnerAutoscaler` `minReplicas:` attribute supports being set to 0.
|
||||||
|
|
||||||
The main use case for scaling from 0 is with the `HorizontalRunnerAutoscaler` kind. To scale from 0 whilst still being able to provision runners as jobs are queued we must use the `HorizontalRunnerAutoscaler` with only certain scaling configurations, only the below configurations support scaling from 0 whilst also being able to provision runners as jobs are queued:
|
The main use case for scaling from 0 is with the `HorizontalRunnerAutoscaler` kind. To scale from 0 whilst still being able to provision runners as jobs are queued we must use the `HorizontalRunnerAutoscaler` with only certain scaling configurations, only the below configurations support scaling from 0 whilst also being able to provision runners as jobs are queued:
|
||||||
|
|
||||||
@@ -761,17 +912,17 @@ The main use case for scaling from 0 is with the `HorizontalRunnerAutoscaler` ki
|
|||||||
- `PercentageRunnersBusy` + Webhook-based autoscaling
|
- `PercentageRunnersBusy` + Webhook-based autoscaling
|
||||||
- Webhook-based autoscaling only
|
- Webhook-based autoscaling only
|
||||||
|
|
||||||
`PercentageRunnersBusy` can't be used alone as, by its definition, it needs one or more GitHub runners to become `busy` to be able to scale. If there isn't a runner to pick up a job and enter a `busy` state then the controller will never know to provision a runner to begin with as this metric has no knowledge of the job queue and is relying using the number of busy runners as a means for calculating the desired replica count.
|
`PercentageRunnersBusy` can't be used alone as, by its definition, it needs one or more GitHub runners to become `busy` to be able to scale. If there isn't a runner to pick up a job and enter a `busy` state then the controller will never know to provision a runner to begin with as this metric has no knowledge of the job queue and is relying on using the number of busy runners as a means for calculating the desired replica count.
|
||||||
|
|
||||||
If a HorizontalRunnerAutoscaler is configured with a secondary metric of `TotalNumberOfQueuedAndInProgressWorkflowRuns` then be aware that the controller will check the primary metric of `PercentageRunnersBusy` first and will only use the secondary metric to calculate the desired replica count if the primary metric returns 0 desired replicas.
|
If a HorizontalRunnerAutoscaler is configured with a secondary metric of `TotalNumberOfQueuedAndInProgressWorkflowRuns` then be aware that the controller will check the primary metric of `PercentageRunnersBusy` first and will only use the secondary metric to calculate the desired replica count if the primary metric returns 0 desired replicas.
|
||||||
|
|
||||||
Webhook-based autoscaling is the best option as it is relatively easy to configure and also it can scale scale quickly.
|
Webhook-based autoscaling is the best option as it is relatively easy to configure and also it can scale quickly.
|
||||||
|
|
||||||
#### Scheduled Overrides
|
#### Scheduled Overrides
|
||||||
|
|
||||||
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
|
||||||
|
|
||||||
`Scheduled Overrides` allows you to configure `HorizontalRunnerAutoscaler` so that its `spec:` gets updated only during a certain period of time. This feature is usually used for following scenarios:
|
`Scheduled Overrides` allows you to configure `HorizontalRunnerAutoscaler` so that its `spec:` gets updated only during a certain period of time. This feature is usually used for the following scenarios:
|
||||||
|
|
||||||
- You want to reduce your infrastructure costs by scaling your Kubernetes nodes down outside a given period
|
- You want to reduce your infrastructure costs by scaling your Kubernetes nodes down outside a given period
|
||||||
- You want to scale for scheduled spikes in workloads
|
- You want to scale for scheduled spikes in workloads
|
||||||
@@ -822,7 +973,7 @@ spec:
|
|||||||
minReplicas: 1
|
minReplicas: 1
|
||||||
```
|
```
|
||||||
|
|
||||||
A recurring override is initially active between `startTime` and `endTime`, and then it repeatedly get activated after a certain period of time denoted by `frequency`.
|
A recurring override is initially active between `startTime` and `endTime`, and then it repeatedly gets activated after a certain period of time denoted by `frequency`.
|
||||||
|
|
||||||
`frequecy` can take one of the following values:
|
`frequecy` can take one of the following values:
|
||||||
|
|
||||||
@@ -831,21 +982,21 @@ spec:
|
|||||||
- `Monthly`
|
- `Monthly`
|
||||||
- `Yearly`
|
- `Yearly`
|
||||||
|
|
||||||
By default, a scheduled override repeats forever. If you want it to repeat until a specific point in time, define `untilTime`. The controller create the last recurrence of the override until the recurrence's `startTime` is equal or earlier than `untilTime`.
|
By default, a scheduled override repeats forever. If you want it to repeat until a specific point in time, define `untilTime`. The controller creates the last recurrence of the override until the recurrence's `startTime` is equal or earlier than `untilTime`.
|
||||||
|
|
||||||
Do ensure that you have enough slack for `untilTime` so that a delayed or offline `actions-runner-controller` is much less likely to miss the last recurrence. For example, you might want to set `untilTime` to `M` minutes after the last recurrence's `startTime`, so that `actions-runner-controller` being offline up to `M` minutes doesn't miss the last recurrence.
|
Do ensure that you have enough slack for `untilTime` so that a delayed or offline `actions-runner-controller` is much less likely to miss the last recurrence. For example, you might want to set `untilTime` to `M` minutes after the last recurrence's `startTime`, so that `actions-runner-controller` being offline up to `M` minutes doesn't miss the last recurrence.
|
||||||
|
|
||||||
**Combining Multiple Scheduled Overrides**:
|
**Combining Multiple Scheduled Overrides**:
|
||||||
|
|
||||||
In case you have a more complex scenarios, try writing two or more entries under `scheduledOverrides`.
|
In case you have a more complex scenario, try writing two or more entries under `scheduledOverrides`.
|
||||||
|
|
||||||
The earlier entry is prioritized higher than later entries. So you usually define one-time overrides in the top of your list, then yearly, monthly, weekly, and lastly daily overrides.
|
The earlier entry is prioritized higher than later entries. So you usually define one-time overrides at the top of your list, then yearly, monthly, weekly, and lastly daily overrides.
|
||||||
|
|
||||||
A common use case for this may be to have 1 override to scale to 0 during the week outside of core business hours and another override to scale to 0 during all hours of the weekend.
|
A common use case for this may be to have 1 override to scale to 0 during the week outside of core business hours and another override to scale to 0 during all hours of the weekend.
|
||||||
|
|
||||||
### Runner with DinD
|
### Runner with DinD
|
||||||
|
|
||||||
When using default runner, runner pod starts up 2 containers: runner and DinD (Docker-in-Docker). This might create issues if there's `LimitRange` set to namespace.
|
When using the default runner, the runner pod starts up 2 containers: runner and DinD (Docker-in-Docker). This might create issues if there's `LimitRange` set to namespace.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# dindrunnerdeployment.yaml
|
# dindrunnerdeployment.yaml
|
||||||
@@ -950,7 +1101,7 @@ spec:
|
|||||||
# false (default) = Docker support is provided by a sidecar container deployed in the runner pod.
|
# false (default) = Docker support is provided by a sidecar container deployed in the runner pod.
|
||||||
# true = No docker sidecar container is deployed in the runner pod but docker can be used within the runner container instead. The image summerwind/actions-runner-dind is used by default.
|
# true = No docker sidecar container is deployed in the runner pod but docker can be used within the runner container instead. The image summerwind/actions-runner-dind is used by default.
|
||||||
dockerdWithinRunnerContainer: true
|
dockerdWithinRunnerContainer: true
|
||||||
#Optional environement variables for docker container
|
#Optional environment variables for docker container
|
||||||
# Valid only when dockerdWithinRunnerContainer=false
|
# Valid only when dockerdWithinRunnerContainer=false
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
- name: HTTP_PROXY
|
- name: HTTP_PROXY
|
||||||
@@ -992,7 +1143,7 @@ spec:
|
|||||||
- mountPath: /var/lib/docker
|
- mountPath: /var/lib/docker
|
||||||
name: docker-extra
|
name: docker-extra
|
||||||
# You can mount some of the shared volumes to the runner container using volumeMounts.
|
# You can mount some of the shared volumes to the runner container using volumeMounts.
|
||||||
# NOTE: Do not try to mount the volume onto the runner workdir itself as it will not work. You could mount it however on a sub directory in the runner workdir
|
# NOTE: Do not try to mount the volume onto the runner workdir itself as it will not work. You could mount it however on a subdirectory in the runner workdir
|
||||||
# Please see https://github.com/actions-runner-controller/actions-runner-controller/issues/630#issuecomment-862087323 for more information.
|
# Please see https://github.com/actions-runner-controller/actions-runner-controller/issues/630#issuecomment-862087323 for more information.
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /home/runner/work/repo
|
- mountPath: /home/runner/work/repo
|
||||||
@@ -1013,6 +1164,217 @@ spec:
|
|||||||
# This must match the name of a RuntimeClass resource available on the cluster.
|
# This must match the name of a RuntimeClass resource available on the cluster.
|
||||||
# More info: https://kubernetes.io/docs/concepts/containers/runtime-class
|
# More info: https://kubernetes.io/docs/concepts/containers/runtime-class
|
||||||
runtimeClassName: "runc"
|
runtimeClassName: "runc"
|
||||||
|
# This is an advanced configuration. Don't touch it unless you know what you're doing.
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
# Usually, the runner container's privileged field is derived from dockerdWithinRunnerContainer.
|
||||||
|
# But in the case where you need to run privileged job steps even if you don't use docker/don't need dockerd within the runner container,
|
||||||
|
# just specified `privileged: true` like this.
|
||||||
|
# See https://github.com/actions-runner-controller/actions-runner-controller/issues/1282
|
||||||
|
# Do note that specifying `privileged: false` while using dind is very likely to fail, even if you use some vm-based container runtimes
|
||||||
|
# like firecracker and kata. Basically they run containers within dedicated micro vms and so
|
||||||
|
# it's more like you can use `privileged: true` safer with those runtimes.
|
||||||
|
#
|
||||||
|
# privileged: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Custom Volume mounts
|
||||||
|
You can configure your own custom volume mounts. For example to have the work/docker data in memory or on NVME SSD, for
|
||||||
|
i/o intensive builds. Other custom volume mounts should be possible as well, see [kubernetes documentation](https://kubernetes.io/docs/concepts/storage/volumes/)
|
||||||
|
|
||||||
|
#### RAM Disk
|
||||||
|
|
||||||
|
Example how to place the runner work dir, docker sidecar and /tmp within the runner onto a ramdisk.
|
||||||
|
```yaml
|
||||||
|
kind: RunnerDeployment
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
dockerVolumeMounts:
|
||||||
|
- mountPath: /var/lib/docker
|
||||||
|
name: docker
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
volumes:
|
||||||
|
- name: docker
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
- name: work # this volume gets automatically used up for the workdir
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
- name: tmp
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
emphemeral: true # recommended to not leak data between builds.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### NVME SSD
|
||||||
|
|
||||||
|
In this example we provide NVME backed storage for the workdir, docker sidecar and /tmp within the runner.
|
||||||
|
Here we use a working example on GKE, which will provide the NVME disk at /mnt/disks/ssd0. We will be placing the respective volumes in subdirs here and in order to be able to run multiple runners we will use the pod name as a prefix for subdirectories. Also the disk will fill up over time and disk space will not be freed until the node is removed.
|
||||||
|
|
||||||
|
**Beware** that running these persistent backend volumes **leave data behind** between 2 different jobs on the workdir and `/tmp` with `emphemeral: false`.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kind: RunnerDeployment
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
env:
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
dockerVolumeMounts:
|
||||||
|
- mountPath: /var/lib/docker
|
||||||
|
name: docker
|
||||||
|
subPathExpr: $(POD_NAME)-docker
|
||||||
|
- mountPath: /runner/_work
|
||||||
|
name: work
|
||||||
|
subPathExpr: $(POD_NAME)-work
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /runner/_work
|
||||||
|
name: work
|
||||||
|
subPathExpr: $(POD_NAME)-work
|
||||||
|
- mountPath: /tmp
|
||||||
|
name: tmp
|
||||||
|
subPathExpr: $(POD_NAME)-tmp
|
||||||
|
dockerEnv:
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/disks/ssd0
|
||||||
|
name: docker
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/disks/ssd0
|
||||||
|
name: work
|
||||||
|
- hostPath:
|
||||||
|
path: /mnt/disks/ssd0
|
||||||
|
name: tmp
|
||||||
|
emphemeral: true # VERY important. otherwise data inside the workdir and /tmp is not cleared between builds
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Docker image layers caching
|
||||||
|
|
||||||
|
> **Note**: Ensure that the volume mount is added to the container that is running the Docker daemon.
|
||||||
|
|
||||||
|
`docker` stores pulled and built image layers in the [daemon's (note not client)](https://docs.docker.com/get-started/overview/#docker-architecture) [local storage area](https://docs.docker.com/storage/storagedriver/#sharing-promotes-smaller-images) which is usually at `/var/lib/docker`.
|
||||||
|
|
||||||
|
By leveraging RunnerSet's dynamic PV provisioning feature and your CSI driver, you can let ARC maintain a pool of PVs that are
|
||||||
|
reused across runner pods to retain `/var/lib/docker`.
|
||||||
|
|
||||||
|
_Be sure to add the volume mount to the container that is supposed to run the docker daemon._
|
||||||
|
|
||||||
|
By default, ARC creates a sidecar container named `docker` within the runner pod for running the docker daemon. In that case,
|
||||||
|
it's where you need the volume mount so that the manifest looks like:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: docker
|
||||||
|
volumeMounts:
|
||||||
|
- name: var-lib-docker
|
||||||
|
mountPath: /var/lib/docker
|
||||||
|
volumeClaimtemplates:
|
||||||
|
- metadata:
|
||||||
|
name: var-lib-docker
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: var-lib-docker
|
||||||
|
```
|
||||||
|
|
||||||
|
With `dockerdWithinRunnerContainer: true`, you need to add the volume mount to the `runner` container.
|
||||||
|
|
||||||
|
#### Go module and build caching
|
||||||
|
|
||||||
|
`Go` is known to cache builds under `$HOME/.cache/go-build` and downloaded modules under `$HOME/pkg/mod`.
|
||||||
|
The module cache dir can be customized by setting `GOMOD_CACHE` so by setting it to somewhere under `$HOME/.cache`,
|
||||||
|
we can have a single PV to host both build and module cache, which might improve Go module downloading and building time.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
env:
|
||||||
|
- name: GOMODCACHE
|
||||||
|
value: "/home/runner/.cache/go-mod"
|
||||||
|
volumeMounts:
|
||||||
|
- name: cache
|
||||||
|
mountPath: "/home/runner/.cache"
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: cache
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: cache
|
||||||
|
```
|
||||||
|
|
||||||
|
#### PV-backed runner work directory
|
||||||
|
|
||||||
|
ARC works by automatically creating runner pods for running [`actions/runner`](https://github.com/actions/runner) and [running `config.sh`](https://docs.github.com/en/actions/hosting-your-own-runners/adding-self-hosted-runners#adding-a-self-hosted-runner-to-a-repository) which you had to ran manually without ARC.
|
||||||
|
|
||||||
|
`config.sh` is the script provided by `actions/runner` to pre-configure the runner process before being started. One of the options provided by `config.sh` is `--work`,
|
||||||
|
which specifies the working directory where the runner runs your workflow jobs in.
|
||||||
|
|
||||||
|
The volume and the partition that hosts the work directory should have several or dozens of GBs free space that might be used by your workflow jobs.
|
||||||
|
|
||||||
|
By default, ARC uses `/runner/_work` as work directory, which is powered by Kubernetes's `emptyDir`. [`emptyDir` is usually backed by a directory created within a host's volume](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir), somewhere under `/var/lib/kuberntes/pods`. Therefore
|
||||||
|
your host's volume that is backing `/var/lib/kubernetes/pods` must have enough free space to serve all the concurrent runner pods that might be deployed onto your host at the same time.
|
||||||
|
|
||||||
|
So, in case you see a job failure seemingly due to "disk full", it's very likely you need to reconfigure your host to have more free space.
|
||||||
|
|
||||||
|
In case you can't rely on host's volume, consider using `RunnerSet` and backing the work directory with a ephemeral PV.
|
||||||
|
|
||||||
|
Kubernetes 1.23 or greater provides the support for [generic ephemeral volumes](https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes), which is designed to support this exact use-case. It's defined in the Pod spec API so it isn't currently available for `RunnerDeployment`. `RunnerSet` is based on Kubernetes' `StatefulSet` which mostly embeds the Pod spec under `spec.template.spec`, so there you go.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: example
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /runner/_work
|
||||||
|
name: work
|
||||||
|
- name: docker
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /runner/_work
|
||||||
|
name: work
|
||||||
|
volumes:
|
||||||
|
- name: work
|
||||||
|
ephemeral:
|
||||||
|
volumeClaimTemplate:
|
||||||
|
spec:
|
||||||
|
accessModes: [ "ReadWriteOnce" ]
|
||||||
|
storageClassName: "runner-work-dir"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
```
|
```
|
||||||
|
|
||||||
### Runner Labels
|
### Runner Labels
|
||||||
@@ -1071,6 +1433,19 @@ spec:
|
|||||||
group: NewGroup
|
group: NewGroup
|
||||||
```
|
```
|
||||||
|
|
||||||
|
GitHub supports custom visibility in a Runner Group to make it available to a specific set of repositories only. By default if no GitHub
|
||||||
|
authentication is included in the webhook server ARC will be assumed that all runner groups to be usable in all repositories.
|
||||||
|
Currently, GitHub does not include the repository runner group membership information in the workflow_job event (or any webhook). To make the ARC "runner group aware" additional GitHub API calls are needed to find out what runner groups are visible to the webhook's repository. This behaviour will impact your rate-limit budget and so the option needs to be explicitly configured by the end user.
|
||||||
|
|
||||||
|
This option will be enabled when proper GitHub authentication options (token, app or basic auth) are provided in the webhook server and `useRunnerGroupsVisibility` is set to true, e.g.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
githubWebhookServer:
|
||||||
|
enabled: false
|
||||||
|
replicaCount: 1
|
||||||
|
useRunnerGroupsVisibility: true
|
||||||
|
```
|
||||||
|
|
||||||
### Runner Entrypoint Features
|
### Runner Entrypoint Features
|
||||||
|
|
||||||
> Environment variable values must all be strings
|
> Environment variable values must all be strings
|
||||||
@@ -1095,16 +1470,22 @@ spec:
|
|||||||
# Disables automatic runner updates
|
# Disables automatic runner updates
|
||||||
- name: DISABLE_RUNNER_UPDATE
|
- name: DISABLE_RUNNER_UPDATE
|
||||||
value: "true"
|
value: "true"
|
||||||
|
# Configure runner with legacy --once instead of --ephemeral flag
|
||||||
|
# WARNING | THIS ENV VAR IS DEPRECATED AND WILL BE REMOVED
|
||||||
|
# IN A FUTURE VERSION OF ARC.
|
||||||
|
# THIS ENV VAR WILL BE REMOVED, SEE ISSUE #1196 FOR DETAILS
|
||||||
|
- name: RUNNER_FEATURE_FLAG_ONCE
|
||||||
|
value: "true"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Using IRSA (IAM Roles for Service Accounts) in EKS
|
### Using IRSA (IAM Roles for Service Accounts) in EKS
|
||||||
|
|
||||||
> This feature requires controller version => [v0.15.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.15.0)
|
> This feature requires controller version => [v0.15.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.15.0)
|
||||||
|
|
||||||
As similar as for regular pods and deployments, you firstly need an existing service account with the IAM role associated.
|
Similar to regular pods and deployments, you firstly need an existing service account with the IAM role associated.
|
||||||
Create one using e.g. `eksctl`. You can refer to [the EKS documentation](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) for more details.
|
Create one using e.g. `eksctl`. You can refer to [the EKS documentation](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) for more details.
|
||||||
|
|
||||||
Once you set up the service account, all you need is to add `serviceAccountName` and `fsGroup` to any pods that uses the IAM-role enabled service account.
|
Once you set up the service account, all you need is to add `serviceAccountName` and `fsGroup` to any pods that use the IAM-role enabled service account.
|
||||||
|
|
||||||
For `RunnerDeployment`, you can set those two fields under the runner spec at `RunnerDeployment.Spec.Template`:
|
For `RunnerDeployment`, you can set those two fields under the runner spec at `RunnerDeployment.Spec.Template`:
|
||||||
|
|
||||||
@@ -1121,141 +1502,13 @@ spec:
|
|||||||
securityContext:
|
securityContext:
|
||||||
fsGroup: 1000
|
fsGroup: 1000
|
||||||
```
|
```
|
||||||
|
|
||||||
### Stateful Runners
|
|
||||||
|
|
||||||
> This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0)
|
|
||||||
|
|
||||||
`actions-runner-controller` supports `RunnerSet` API that let you deploy stateful runners. A stateful runner is designed to be able to store some data persists across GitHub Actions workflow and job runs. You might find it useful, for example, to speed up your docker builds by persisting the docker layer cache.
|
|
||||||
|
|
||||||
A basic `RunnerSet` would look like this:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: RunnerSet
|
|
||||||
metadata:
|
|
||||||
name: example
|
|
||||||
spec:
|
|
||||||
ephemeral: false
|
|
||||||
replicas: 2
|
|
||||||
repository: mumoshu/actions-runner-controller-ci
|
|
||||||
# Other mandatory fields from StatefulSet
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: example
|
|
||||||
serviceName: example
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: example
|
|
||||||
```
|
|
||||||
|
|
||||||
As it is based on `StatefulSet`, `selector` and `template.medatada.labels` needs to be defined and have the exact same set of labels. `serviceName` must be set to some non-empty string as it is also required by `StatefulSet`.
|
|
||||||
|
|
||||||
Runner-related fields like `ephemeral`, `repository`, `organization`, `enterprise`, and so on should be written directly under `spec`.
|
|
||||||
|
|
||||||
Fields like `volumeClaimTemplates` that originates from `StatefulSet` should also be written directly under `spec`.
|
|
||||||
|
|
||||||
Pod-related fields like security contexts and volumes are written under `spec.template.spec` like `StatefulSet`.
|
|
||||||
|
|
||||||
Similarly, container-related fields like resource requests and limits, container image names and tags, security context, and so on are written under `spec.template.spec.containers`. There are two reserved container `name`, `runner` and `docker`. The former is for the container that runs [actions runner](https://github.com/actions/runner) and the latter is for the container that runs a dockerd.
|
|
||||||
|
|
||||||
For a more complex example, see the below:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: RunnerSet
|
|
||||||
metadata:
|
|
||||||
name: example
|
|
||||||
spec:
|
|
||||||
ephemeral: false
|
|
||||||
replicas: 2
|
|
||||||
repository: mumoshu/actions-runner-controller-ci
|
|
||||||
dockerdWithinRunnerContainer: true
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
#All level/role/type/user values will vary based on your SELinux policies.
|
|
||||||
#See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux_atomic_host/7/html/container_security_guide/docker_selinux_security_policy for information about SELinux with containers
|
|
||||||
seLinuxOptions:
|
|
||||||
level: "s0"
|
|
||||||
role: "system_r"
|
|
||||||
type: "super_t"
|
|
||||||
user: "system_u"
|
|
||||||
containers:
|
|
||||||
- name: runner
|
|
||||||
env: []
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: "4.0"
|
|
||||||
memory: "8Gi"
|
|
||||||
requests:
|
|
||||||
cpu: "2.0"
|
|
||||||
memory: "4Gi"
|
|
||||||
- name: docker
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: "4.0"
|
|
||||||
memory: "8Gi"
|
|
||||||
requests:
|
|
||||||
cpu: "2.0"
|
|
||||||
memory: "4Gi"
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also read the design and usage documentation written in the original pull request that introduced `RunnerSet` for more information.
|
|
||||||
|
|
||||||
https://github.com/actions-runner-controller/actions-runner-controller/pull/629
|
|
||||||
|
|
||||||
Under the hood, `RunnerSet` relies on Kubernetes's `StatefulSet` and Mutating Webhook. A statefulset is used to create a number of pods that has stable names and dynamically provisioned persistent volumes, so that each statefulset-managed pod gets the same persistent volume even after restarting. A mutating webhook is used to dynamically inject a runner's "registration token" which is used to call GitHub's "Create Runner" API.
|
|
||||||
|
|
||||||
We envision that `RunnerSet` will eventually replace `RunnerDeployment`, as `RunnerSet` provides a more standard API that is easy to learn and use because it is based on `StatefulSet`, and it has a support for `volumeClaimTemplates` which is crucial to manage dynamically provisioned persistent volumes.
|
|
||||||
|
|
||||||
**Limitations**
|
|
||||||
|
|
||||||
* For autoscaling the `RunnerSet` kind only supports pull driven scaling or the `workflow_job` event for webhook driven scaling.
|
|
||||||
* For autoscaling the `RunnerSet` kind doesn't support the [registration-only runner](#autoscaling-tofrom-0), these are deprecated however and to be [removed](https://github.com/actions-runner-controller/actions-runner-controller/issues/859)
|
|
||||||
* A known down-side of relying on `StatefulSet` is that it misses a support for `maxUnavailable`. A `StatefulSet` basically works like `maxUnavailable: 1` in `Deployment`, which means that it can take down only one pod concurrently while doing a rolling-update of pods. Kubernetes 1.22 doesn't support customizing it yet so probably it takes more releases to arrive. See https://github.com/kubernetes/kubernetes/issues/68397 for more information.
|
|
||||||
|
|
||||||
### Ephemeral Runners
|
|
||||||
|
|
||||||
Both `RunnerDeployment` and `RunnerSet` has ability to configure `ephemeral: true` in the spec.
|
|
||||||
|
|
||||||
When it is configured, it passes a `--once` flag to every runner.
|
|
||||||
|
|
||||||
`--once` is an experimental `actions/runner` feature that instructs the runner to stop after the first job run. It has a known race condition issue that means the runner may fetch a job even when it's being terminated. If a runner fetched a job while terminating, the job is very likely to fail because the terminating runner doesn't wait for the job to complete. This is tracked in issue [#466](https://github.com/actions-runner-controller/actions-runner-controller/issues/466).
|
|
||||||
|
|
||||||
Since the implementation of the `--once` flag GitHub have implemented the `--ephemeral` flag which has no known race conditions and is much more supported by GitHub, this is the prefered flag for ephemeral runners. To have your `RunnerDeployment` and `RunnerSet` kinds use this new flag instead of the `--once` flag set `RUNNER_FEATURE_FLAG_EPHEMERAL` to `"true"`. For example, a `RunnerSet` configured to use the new flag looks like:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
kind: RunnerSet
|
|
||||||
metadata:
|
|
||||||
name: example-runnerset
|
|
||||||
spec:
|
|
||||||
# ...
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: example-runnerset
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: runner
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
env:
|
|
||||||
- name: RUNNER_FEATURE_FLAG_EPHEMERAL
|
|
||||||
value: "true"
|
|
||||||
```
|
|
||||||
|
|
||||||
You should configure all your ephemeral runners to use the new flag unless you have a reason for needing to use the old flag.
|
|
||||||
|
|
||||||
Once able, `actions-runner-controller` will make `--ephemeral` the default option for `ephemeral: true` runners and potentially remove `--once` entirely. It is likely that in the future the `--once` flag will be officially deprecated by GitHub and subsquently removed in `actions/runner`.
|
|
||||||
|
|
||||||
### Software Installed in the Runner Image
|
### Software Installed in the Runner Image
|
||||||
|
|
||||||
**Cloud Tooling**<br />
|
**Cloud Tooling**<br />
|
||||||
The project supports being deployed on the various cloud Kubernetes platforms (e.g. EKS), it does not however aim to go beyond that. No cloud specific tooling is bundled in the base runner, this is an active decision to keep the overhead of maintaining the solution manageable.
|
The project supports being deployed on the various cloud Kubernetes platforms (e.g. EKS), it does not however aim to go beyond that. No cloud specific tooling is bundled in the base runner, this is an active decision to keep the overhead of maintaining the solution manageable.
|
||||||
|
|
||||||
**Bundled Software**<br />
|
**Bundled Software**<br />
|
||||||
The GitHub hosted runners include a large amount of pre-installed software packages. GitHub maintain a list in README files at <https://github.com/actions/virtual-environments/tree/main/images/linux>
|
The GitHub hosted runners include a large amount of pre-installed software packages. GitHub maintains a list in README files at <https://github.com/actions/virtual-environments/tree/main/images/linux>
|
||||||
|
|
||||||
This solution maintains a few runner images with `latest` aligning with GitHub's Ubuntu version, these images do not contain all of the software installed on the GitHub runners. The images contain the following subset of packages from the GitHub runners:
|
This solution maintains a few runner images with `latest` aligning with GitHub's Ubuntu version, these images do not contain all of the software installed on the GitHub runners. The images contain the following subset of packages from the GitHub runners:
|
||||||
|
|
||||||
@@ -1266,7 +1519,7 @@ This solution maintains a few runner images with `latest` aligning with GitHub's
|
|||||||
|
|
||||||
The virtual environments from GitHub contain a lot more software packages (different versions of Java, Node.js, Golang, .NET, etc) which are not provided in the runner image. Most of these have dedicated setup actions which allow the tools to be installed on-demand in a workflow, for example: `actions/setup-java` or `actions/setup-node`
|
The virtual environments from GitHub contain a lot more software packages (different versions of Java, Node.js, Golang, .NET, etc) which are not provided in the runner image. Most of these have dedicated setup actions which allow the tools to be installed on-demand in a workflow, for example: `actions/setup-java` or `actions/setup-node`
|
||||||
|
|
||||||
If there is a need to include packages in the runner image for which there is no setup action, then this can be achieved by building a custom container image for the runner. The easiest way is to start with the `summerwind/actions-runner` image and installing the extra dependencies directly in the docker image:
|
If there is a need to include packages in the runner image for which there is no setup action, then this can be achieved by building a custom container image for the runner. The easiest way is to start with the `summerwind/actions-runner` image and then install the extra dependencies directly in the docker image:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
FROM summerwind/actions-runner:latest
|
FROM summerwind/actions-runner:latest
|
||||||
@@ -1319,7 +1572,7 @@ $ helm --upgrade install actions-runner-controller/actions-runner-controller \
|
|||||||
|
|
||||||
# Troubleshooting
|
# Troubleshooting
|
||||||
|
|
||||||
See [troubleshooting guide](TROUBLESHOOTING.md) for solutions to various problems people have ran into consistently.
|
See [troubleshooting guide](TROUBLESHOOTING.md) for solutions to various problems people have run into consistently.
|
||||||
|
|
||||||
# Contributing
|
# Contributing
|
||||||
|
|
||||||
|
|||||||
22
SECURITY.md
Normal file
22
SECURITY.md
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Security Policy
|
||||||
|
|
||||||
|
## Sponsoring the project
|
||||||
|
|
||||||
|
This project is maintained by a small team of two and therefore lacks the resource to provide security fixes in a timely manner.
|
||||||
|
|
||||||
|
If you have important business(es) that relies on this project, please consider sponsoring the project so that the maintainer(s) can commit to providing such service.
|
||||||
|
|
||||||
|
Please refer to https://github.com/sponsors/actions-runner-controller for available tiers.
|
||||||
|
|
||||||
|
## Supported Versions
|
||||||
|
|
||||||
|
| Version | Supported |
|
||||||
|
| ------- | ------------------ |
|
||||||
|
| 0.23.0 | :white_check_mark: |
|
||||||
|
| < 0.23.0| :x: |
|
||||||
|
|
||||||
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
|
To report a security issue, please email ykuoka+arcsecurity(at)gmail.com with a description of the issue, the steps you took to create the issue, affected versions, and, if known, mitigations for the issue.
|
||||||
|
|
||||||
|
A maintainer will try to respond within 5 working days. If the issue is confirmed as a vulnerability, a Security Advisory will be opened. This project tries to follow a 90 day disclosure timeline.
|
||||||
@@ -1,10 +1,28 @@
|
|||||||
# Troubleshooting
|
# Troubleshooting
|
||||||
|
|
||||||
* [Invalid header field value](#invalid-header-field-value)
|
* [Tools](#tools)
|
||||||
* [Runner coming up before network available](#runner-coming-up-before-network-available)
|
* [Installation](#installation)
|
||||||
* [Deployment fails on GKE due to webhooks](#deployment-fails-on-gke-due-to-webhooks)
|
* [Invalid header field value](#invalid-header-field-value)
|
||||||
|
* [Deployment fails on GKE due to webhooks](#deployment-fails-on-gke-due-to-webhooks)
|
||||||
|
* [Operations](#operations)
|
||||||
|
* [Stuck runner kind or backing pod](#stuck-runner-kind-or-backing-pod)
|
||||||
|
* [Delay in jobs being allocated to runners](#delay-in-jobs-being-allocated-to-runners)
|
||||||
|
* [Runner coming up before network available](#runner-coming-up-before-network-available)
|
||||||
|
* [Outgoing network action hangs indefinitely](#outgoing-network-action-hangs-indefinitely)
|
||||||
|
* [Unable to scale to zero with TotalNumberOfQueuedAndInProgressWorkflowRuns](#unable-to-scale-to-zero-with-totalnumberofqueuedandinprogressworkflowruns)
|
||||||
|
|
||||||
## Invalid header field value
|
## Tools
|
||||||
|
|
||||||
|
A list of tools which are helpful for troubleshooting
|
||||||
|
|
||||||
|
* https://github.com/rewanthtammana/kubectl-fields Kubernetes resources hierarchy parsing tool
|
||||||
|
* https://github.com/stern/stern Multi pod and container log tailing for Kubernetes
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Troubeshooting runbooks that relate to ARC installation problems
|
||||||
|
|
||||||
|
### Invalid header field value
|
||||||
|
|
||||||
**Problem**
|
**Problem**
|
||||||
|
|
||||||
@@ -23,7 +41,103 @@ Your base64'ed PAT token has a new line at the end, it needs to be created witho
|
|||||||
* `echo -n $TOKEN | base64`
|
* `echo -n $TOKEN | base64`
|
||||||
* Create the secret as described in the docs using the shell and documented flags
|
* Create the secret as described in the docs using the shell and documented flags
|
||||||
|
|
||||||
## Runner coming up before network available
|
|
||||||
|
### Deployment fails on GKE due to webhooks
|
||||||
|
|
||||||
|
**Problem**
|
||||||
|
|
||||||
|
Due to GKEs firewall settings you may run into the following errors when trying to deploy runners on a private GKE cluster:
|
||||||
|
|
||||||
|
```
|
||||||
|
Internal error occurred: failed calling webhook "mutate.runner.actions.summerwind.dev":
|
||||||
|
Post https://webhook-service.actions-runner-system.svc:443/mutate-actions-summerwind-dev-v1alpha1-runner?timeout=10s:
|
||||||
|
context deadline exceeded
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution**<br />
|
||||||
|
|
||||||
|
To fix this, you may either:
|
||||||
|
|
||||||
|
1. Configure the webhook to use another port, such as 443 or 10250, [each of
|
||||||
|
which allow traffic by default](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules).
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# With helm, you'd set `webhookPort` to the port number of your choice
|
||||||
|
# See https://github.com/actions-runner-controller/actions-runner-controller/pull/1410/files for more information
|
||||||
|
helm upgrade --install --namespace actions-runner-system --create-namespace \
|
||||||
|
--wait actions-runner-controller actions-runner-controller/actions-runner-controller \
|
||||||
|
--set webhookPort=10250
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Set up a firewall rule to allow the master node to connect to the default
|
||||||
|
webhook port. The exact way to do this may vary, but the following script
|
||||||
|
should point you in the right direction:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# 1) Retrieve the network tag automatically given to the worker nodes
|
||||||
|
# NOTE: this only works if you have only one cluster in your GCP project. You will have to manually inspect the result of this command to find the tag for the cluster you want to target
|
||||||
|
WORKER_NODES_TAG=$(gcloud compute instances list --format='text(tags.items[0])' --filter='metadata.kubelet-config:*' | grep tags | awk '{print $2}' | sort | uniq)
|
||||||
|
|
||||||
|
# 2) Take note of the VPC network in which you deployed your cluster
|
||||||
|
# NOTE this only works if you have only one network in which you deploy your clusters
|
||||||
|
NETWORK=$(gcloud compute instances list --format='text(networkInterfaces[0].network)' --filter='metadata.kubelet-config:*' | grep networks | awk -F'/' '{print $NF}' | sort | uniq)
|
||||||
|
|
||||||
|
# 3) Get the master source ip block
|
||||||
|
SOURCE=$(gcloud container clusters describe <cluster-name> --region <region> | grep masterIpv4CidrBlock| cut -d ':' -f 2 | tr -d ' ')
|
||||||
|
|
||||||
|
gcloud compute firewall-rules create k8s-cert-manager --source-ranges $SOURCE --target-tags $WORKER_NODES_TAG --allow TCP:9443 --network $NETWORK
|
||||||
|
```
|
||||||
|
|
||||||
|
## Operations
|
||||||
|
|
||||||
|
Troubeshooting runbooks that relate to ARC operational problems
|
||||||
|
|
||||||
|
### Stuck runner kind or backing pod
|
||||||
|
|
||||||
|
**Problem**
|
||||||
|
|
||||||
|
Sometimes either the runner kind (`kubectl get runners`) or it's underlying pod can get stuck in a terminating state for various reasons. You can get the kind unstuck by removing its finaliser using something like this:
|
||||||
|
|
||||||
|
**Solution**
|
||||||
|
|
||||||
|
Remove the finaliser from the relevent runner kind or pod
|
||||||
|
|
||||||
|
```
|
||||||
|
# Get all kind runners and remove the finalizer
|
||||||
|
$ kubectl get runners --no-headers | awk {'print $1'} | xargs kubectl patch runner --type merge -p '{"metadata":{"finalizers":null}}'
|
||||||
|
|
||||||
|
# Get all pods that are stuck terminating and remove the finalizer
|
||||||
|
$ kubectl -n get pods | grep Terminating | awk {'print $1'} | xargs kubectl patch pod -p '{"metadata":{"finalizers":null}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
_Note the code assumes you have already selected the namespace your runners are in and that they
|
||||||
|
are in a namespace not shared with anything else_
|
||||||
|
|
||||||
|
### Delay in jobs being allocated to runners
|
||||||
|
|
||||||
|
**Problem**
|
||||||
|
|
||||||
|
ARC isn't involved in jobs actually getting allocated to a runner. ARC is responsible for orchestrating runners and the runner lifecycle. Why some people see large delays in job allocation is not clear however it has been https://github.com/actions-runner-controller/actions-runner-controller/issues/1387#issuecomment-1122593984 that this is caused from the self-update process somehow.
|
||||||
|
|
||||||
|
**Solution**
|
||||||
|
|
||||||
|
Disable the self-update process in your runner manifests
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerDeployment
|
||||||
|
metadata:
|
||||||
|
name: example-runnerdeployment-with-sleep
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
...
|
||||||
|
env:
|
||||||
|
- name: DISABLE_RUNNER_UPDATE
|
||||||
|
value: "true"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Runner coming up before network available
|
||||||
|
|
||||||
**Problem**
|
**Problem**
|
||||||
|
|
||||||
@@ -61,40 +175,48 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
...
|
||||||
env:
|
env:
|
||||||
# This runner's entrypoint script will have a 5 seconds delay
|
|
||||||
# as a first action within the entrypoint script
|
|
||||||
- name: STARTUP_DELAY_IN_SECONDS
|
- name: STARTUP_DELAY_IN_SECONDS
|
||||||
value: "5"
|
value: "5"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Deployment fails on GKE due to webhooks
|
## Outgoing network action hangs indefinitely
|
||||||
|
|
||||||
**Problem**
|
**Problem**
|
||||||
|
|
||||||
Due to GKEs firewall settings you may run into the following errors when trying to deploy runners on a private GKE cluster:
|
Some random outgoing network actions hangs indefinitely. This could be because your cluster does not give Docker the standard MTU of 1500, you can check this out by running `ip link` in a pod that encounters the problem and reading the outgoing interface's MTU value. If it is smaller than 1500, then try the following.
|
||||||
|
|
||||||
```
|
**Solution**
|
||||||
Internal error occurred: failed calling webhook "mutate.runner.actions.summerwind.dev":
|
|
||||||
Post https://webhook-service.actions-runner-system.svc:443/mutate-actions-summerwind-dev-v1alpha1-runner?timeout=10s:
|
Add a `dockerMTU` key in your runner's spec with the value you read on the outgoing interface. For instance:
|
||||||
context deadline exceeded
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerDeployment
|
||||||
|
metadata:
|
||||||
|
name: github-runner
|
||||||
|
namespace: github-system
|
||||||
|
spec:
|
||||||
|
replicas: 6
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
dockerMTU: 1400
|
||||||
|
repository: $username/$repo
|
||||||
|
env: []
|
||||||
```
|
```
|
||||||
|
|
||||||
**Solution**<br />
|
There may be more places you need to tweak for MTU.
|
||||||
|
Please consult issues like #651 for more information.
|
||||||
|
|
||||||
To fix this, you need to set up a firewall rule to allow the master node to connect to the webhook port.
|
## Unable to scale to zero with TotalNumberOfQueuedAndInProgressWorkflowRuns
|
||||||
The exact way to do this may wary, but the following script should point you in the right direction:
|
|
||||||
|
|
||||||
```
|
**Problem**
|
||||||
# 1) Retrieve the network tag automatically given to the worker nodes
|
|
||||||
# NOTE: this only works if you have only one cluster in your GCP project. You will have to manually inspect the result of this command to find the tag for the cluster you want to target
|
|
||||||
WORKER_NODES_TAG=$(gcloud compute instances list --format='text(tags.items[0])' --filter='metadata.kubelet-config:*' | grep tags | awk '{print $2}' | sort | uniq)
|
|
||||||
|
|
||||||
# 2) Take note of the VPC network in which you deployed your cluster
|
HRA doesn't scale the RunnerDeployment to zero, even though you did configure HRA correctly, to have a pull-based scaling metric `TotalNumberOfQueuedAndInProgressWorkflowRuns`, and set `minReplicas: 0`.
|
||||||
# NOTE this only works if you have only one network in which you deploy your clusters
|
|
||||||
NETWORK=$(gcloud compute instances list --format='text(networkInterfaces[0].network)' --filter='metadata.kubelet-config:*' | grep networks | awk -F'/' '{print $NF}' | sort | uniq)
|
|
||||||
|
|
||||||
# 3) Get the master source ip block
|
**Solution**
|
||||||
SOURCE=$(gcloud container clusters describe <cluster-name> --region <region> | grep masterIpv4CidrBlock| cut -d ':' -f 2 | tr -d ' ')
|
|
||||||
gcloud compute firewall-rules create k8s-cert-manager --source-ranges $SOURCE --target-tags $WORKER_NODES_TAG --allow TCP:9443 --network $NETWORK
|
You very likely have some dangling workflow jobs stuck in `queued` or `in_progress` as seen in [#1057](https://github.com/actions-runner-controller/actions-runner-controller/issues/1057#issuecomment-1133439061).
|
||||||
```
|
|
||||||
|
Manually call [the "list workflow runs" API](https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-repository), and [remove the dangling workflow job(s)](https://docs.github.com/en/rest/actions/workflow-runs#delete-a-workflow-run).
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ tpe=${ACCEPTANCE_TEST_SECRET_TYPE}
|
|||||||
|
|
||||||
VALUES_FILE=${VALUES_FILE:-$(dirname $0)/values.yaml}
|
VALUES_FILE=${VALUES_FILE:-$(dirname $0)/values.yaml}
|
||||||
|
|
||||||
|
kubectl delete secret -n actions-runner-system controller-manager || :
|
||||||
|
|
||||||
if [ "${tpe}" == "token" ]; then
|
if [ "${tpe}" == "token" ]; then
|
||||||
if ! kubectl get secret controller-manager -n actions-runner-system >/dev/null; then
|
if ! kubectl get secret controller-manager -n actions-runner-system >/dev/null; then
|
||||||
kubectl create secret generic controller-manager \
|
kubectl create secret generic controller-manager \
|
||||||
@@ -16,16 +18,29 @@ elif [ "${tpe}" == "app" ]; then
|
|||||||
kubectl create secret generic controller-manager \
|
kubectl create secret generic controller-manager \
|
||||||
-n actions-runner-system \
|
-n actions-runner-system \
|
||||||
--from-literal=github_app_id=${APP_ID:?must not be empty} \
|
--from-literal=github_app_id=${APP_ID:?must not be empty} \
|
||||||
--from-literal=github_app_installation_id=${INSTALLATION_ID:?must not be empty} \
|
--from-literal=github_app_installation_id=${APP_INSTALLATION_ID:?must not be empty} \
|
||||||
--from-file=github_app_private_key=${PRIVATE_KEY_FILE_PATH:?must not be empty}
|
--from-file=github_app_private_key=${APP_PRIVATE_KEY_FILE:?must not be empty}
|
||||||
else
|
else
|
||||||
echo "ACCEPTANCE_TEST_SECRET_TYPE must be set to either \"token\" or \"app\"" 1>&2
|
echo "ACCEPTANCE_TEST_SECRET_TYPE must be set to either \"token\" or \"app\"" 1>&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "${WEBHOOK_GITHUB_TOKEN}" ]; then
|
||||||
|
kubectl -n actions-runner-system delete secret \
|
||||||
|
github-webhook-server || :
|
||||||
|
kubectl -n actions-runner-system create secret generic \
|
||||||
|
github-webhook-server \
|
||||||
|
--from-literal=github_token=${WEBHOOK_GITHUB_TOKEN:?WEBHOOK_GITHUB_TOKEN must not be empty}
|
||||||
|
else
|
||||||
|
echo 'Skipped deploying secret "github-webhook-server". Set WEBHOOK_GITHUB_TOKEN to deploy.' 1>&2
|
||||||
|
fi
|
||||||
|
|
||||||
tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL}
|
tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL}
|
||||||
|
|
||||||
|
TEST_ID=${TEST_ID:-default}
|
||||||
|
|
||||||
if [ "${tool}" == "helm" ]; then
|
if [ "${tool}" == "helm" ]; then
|
||||||
|
set -v
|
||||||
helm upgrade --install actions-runner-controller \
|
helm upgrade --install actions-runner-controller \
|
||||||
charts/actions-runner-controller \
|
charts/actions-runner-controller \
|
||||||
-n actions-runner-system \
|
-n actions-runner-system \
|
||||||
@@ -34,42 +49,83 @@ if [ "${tool}" == "helm" ]; then
|
|||||||
--set authSecret.create=false \
|
--set authSecret.create=false \
|
||||||
--set image.repository=${NAME} \
|
--set image.repository=${NAME} \
|
||||||
--set image.tag=${VERSION} \
|
--set image.tag=${VERSION} \
|
||||||
|
--set podAnnotations.test-id=${TEST_ID} \
|
||||||
|
--set githubWebhookServer.podAnnotations.test-id=${TEST_ID} \
|
||||||
-f ${VALUES_FILE}
|
-f ${VALUES_FILE}
|
||||||
kubectl apply -f charts/actions-runner-controller/crds
|
set +v
|
||||||
kubectl -n actions-runner-system wait deploy/actions-runner-controller --for condition=available --timeout 60s
|
# To prevent `CustomResourceDefinition.apiextensions.k8s.io "runners.actions.summerwind.dev" is invalid: metadata.annotations: Too long: must have at most 262144 bytes`
|
||||||
|
# errors
|
||||||
|
kubectl create -f charts/actions-runner-controller/crds || kubectl replace -f charts/actions-runner-controller/crds
|
||||||
|
# This wait fails due to timeout when it's already in crashloopback and this update doesn't change the image tag.
|
||||||
|
# That's why we add `|| :`. With that we prevent stopping the script in case of timeout and
|
||||||
|
# proceed to delete (possibly in crashloopback and/or running with outdated image) pods so that they are recreated by K8s.
|
||||||
|
kubectl -n actions-runner-system wait deploy/actions-runner-controller --for condition=available --timeout 60s || :
|
||||||
else
|
else
|
||||||
kubectl apply \
|
kubectl apply \
|
||||||
-n actions-runner-system \
|
-n actions-runner-system \
|
||||||
-f release/actions-runner-controller.yaml
|
-f release/actions-runner-controller.yaml
|
||||||
kubectl -n actions-runner-system wait deploy/controller-manager --for condition=available --timeout 120s
|
kubectl -n actions-runner-system wait deploy/controller-manager --for condition=available --timeout 120s || :
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Restart all ARC pods
|
||||||
|
kubectl -n actions-runner-system delete po -l app.kubernetes.io/name=actions-runner-controller
|
||||||
|
|
||||||
|
echo Waiting for all ARC pods to be up and running after restart
|
||||||
|
|
||||||
|
kubectl -n actions-runner-system wait deploy/actions-runner-controller --for condition=available --timeout 120s
|
||||||
|
|
||||||
# Adhocly wait for some time until actions-runner-controller's admission webhook gets ready
|
# Adhocly wait for some time until actions-runner-controller's admission webhook gets ready
|
||||||
sleep 20
|
sleep 20
|
||||||
|
|
||||||
RUNNER_LABEL=${RUNNER_LABEL:-self-hosted}
|
RUNNER_LABEL=${RUNNER_LABEL:-self-hosted}
|
||||||
|
|
||||||
if [ -n "${TEST_REPO}" ]; then
|
if [ -n "${TEST_REPO}" ]; then
|
||||||
if [ -n "USE_RUNNERSET" ]; then
|
if [ "${USE_RUNNERSET}" != "false" ]; then
|
||||||
cat acceptance/testdata/repo.runnerset.yaml | envsubst | kubectl apply -f -
|
cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ENTERPRISE= TEST_ORG= RUNNER_MIN_REPLICAS=${REPO_RUNNER_MIN_REPLICAS} NAME=repo-runnerset envsubst | kubectl apply -f -
|
||||||
cat acceptance/testdata/repo.runnerset.hra.yaml | envsubst | kubectl apply -f -
|
|
||||||
else
|
else
|
||||||
echo 'Deploying runnerdeployment and hra. Set USE_RUNNERSET if you want to deploy runnerset instead.'
|
echo 'Deploying runnerdeployment and hra. Set USE_RUNNERSET if you want to deploy runnerset instead.'
|
||||||
cat acceptance/testdata/repo.runnerdeploy.yaml | envsubst | kubectl apply -f -
|
cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_ORG= RUNNER_MIN_REPLICAS=${REPO_RUNNER_MIN_REPLICAS} NAME=repo-runnerdeploy envsubst | kubectl apply -f -
|
||||||
cat acceptance/testdata/repo.hra.yaml | envsubst | kubectl apply -f -
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo 'Skipped deploying runnerdeployment and hra. Set TEST_REPO to "yourorg/yourrepo" to deploy.'
|
echo 'Skipped deploying runnerdeployment and hra. Set TEST_REPO to "yourorg/yourrepo" to deploy.'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "${TEST_ORG}" ]; then
|
if [ -n "${TEST_ORG}" ]; then
|
||||||
cat acceptance/testdata/org.runnerdeploy.yaml | envsubst | kubectl apply -f -
|
if [ "${USE_RUNNERSET}" != "false" ]; then
|
||||||
|
cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= RUNNER_MIN_REPLICAS=${ORG_RUNNER_MIN_REPLICAS} NAME=org-runnerset envsubst | kubectl apply -f -
|
||||||
if [ -n "${TEST_ORG_REPO}" ]; then
|
|
||||||
cat acceptance/testdata/org.hra.yaml | envsubst | kubectl apply -f -
|
|
||||||
else
|
else
|
||||||
echo 'Skipped deploying organizational hra. Set TEST_ORG_REPO to "yourorg/yourrepo" to deploy.'
|
cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= RUNNER_MIN_REPLICAS=${ORG_RUNNER_MIN_REPLICAS} NAME=org-runnerdeploy envsubst | kubectl apply -f -
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "${TEST_ORG_GROUP}" ]; then
|
||||||
|
if [ "${USE_RUNNERSET}" != "false" ]; then
|
||||||
|
cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= RUNNER_MIN_REPLICAS=${ORG_RUNNER_MIN_REPLICAS} TEST_GROUP=${TEST_ORG_GROUP} NAME=orggroup-runnerset envsubst | kubectl apply -f -
|
||||||
|
else
|
||||||
|
cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= RUNNER_MIN_REPLICAS=${ORG_RUNNER_MIN_REPLICAS} TEST_GROUP=${TEST_ORG_GROUP} NAME=orggroup-runnerdeploy envsubst | kubectl apply -f -
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo 'Skipped deploying enterprise runnerdeployment. Set TEST_ORG_GROUP to deploy.'
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo 'Skipped deploying organizational runnerdeployment. Set TEST_ORG to deploy.'
|
echo 'Skipped deploying organizational runnerdeployment. Set TEST_ORG to deploy.'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "${TEST_ENTERPRISE}" ]; then
|
||||||
|
if [ "${USE_RUNNERSET}" != "false" ]; then
|
||||||
|
cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ORG= TEST_REPO= RUNNER_MIN_REPLICAS=${ENTERPRISE_RUNNER_MIN_REPLICAS} NAME=enterprise-runnerset envsubst | kubectl apply -f -
|
||||||
|
else
|
||||||
|
cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ORG= TEST_REPO= RUNNER_MIN_REPLICAS=${ENTERPRISE_RUNNER_MIN_REPLICAS} NAME=enterprise-runnerdeploy envsubst | kubectl apply -f -
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "${TEST_ENTERPRISE_GROUP}" ]; then
|
||||||
|
if [ "${USE_RUNNERSET}" != "false" ]; then
|
||||||
|
cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ORG= TEST_REPO= RUNNER_MIN_REPLICAS=${ENTERPRISE_RUNNER_MIN_REPLICAS} TEST_GROUP=${TEST_ENTERPRISE_GROUP} NAME=enterprisegroup-runnerset envsubst | kubectl apply -f -
|
||||||
|
else
|
||||||
|
cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ORG= TEST_REPO= RUNNER_MIN_REPLICAS=${ENTERPRISE_RUNNER_MIN_REPLICAS} TEST_GROUP=${TEST_ENTERPRISE_GROUP} NAME=enterprisegroup-runnerdeploy envsubst | kubectl apply -f -
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo 'Skipped deploying enterprise runnerdeployment. Set TEST_ENTERPRISE_GROUP to deploy.'
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo 'Skipped deploying enterprise runnerdeployment. Set TEST_ENTERPRISE to deploy.'
|
||||||
|
fi
|
||||||
|
|||||||
36
acceptance/testdata/org.hra.yaml
vendored
36
acceptance/testdata/org.hra.yaml
vendored
@@ -1,36 +0,0 @@
|
|||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: HorizontalRunnerAutoscaler
|
|
||||||
metadata:
|
|
||||||
name: org
|
|
||||||
spec:
|
|
||||||
scaleTargetRef:
|
|
||||||
name: org-runnerdeploy
|
|
||||||
scaleUpTriggers:
|
|
||||||
- githubEvent:
|
|
||||||
checkRun:
|
|
||||||
types: ["created"]
|
|
||||||
status: "queued"
|
|
||||||
amount: 1
|
|
||||||
duration: "1m"
|
|
||||||
scheduledOverrides:
|
|
||||||
- startTime: "2021-05-11T16:05:00+09:00"
|
|
||||||
endTime: "2021-05-11T16:40:00+09:00"
|
|
||||||
minReplicas: 2
|
|
||||||
- startTime: "2021-05-01T00:00:00+09:00"
|
|
||||||
endTime: "2021-05-03T00:00:00+09:00"
|
|
||||||
recurrenceRule:
|
|
||||||
frequency: Weekly
|
|
||||||
untilTime: "2022-05-01T00:00:00+09:00"
|
|
||||||
minReplicas: 0
|
|
||||||
minReplicas: 0
|
|
||||||
maxReplicas: 5
|
|
||||||
# Used to test that HRA is working for org runners
|
|
||||||
metrics:
|
|
||||||
- type: PercentageRunnersBusy
|
|
||||||
scaleUpThreshold: '0.75'
|
|
||||||
scaleDownThreshold: '0.3'
|
|
||||||
scaleUpFactor: '2'
|
|
||||||
scaleDownFactor: '0.5'
|
|
||||||
- type: TotalNumberOfQueuedAndInProgressWorkflowRuns
|
|
||||||
repositoryNames:
|
|
||||||
- ${TEST_ORG_REPO}
|
|
||||||
25
acceptance/testdata/repo.hra.yaml
vendored
25
acceptance/testdata/repo.hra.yaml
vendored
@@ -1,25 +0,0 @@
|
|||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: HorizontalRunnerAutoscaler
|
|
||||||
metadata:
|
|
||||||
name: actions-runner-aos-autoscaler
|
|
||||||
spec:
|
|
||||||
scaleTargetRef:
|
|
||||||
name: example-runnerdeploy
|
|
||||||
scaleUpTriggers:
|
|
||||||
- githubEvent:
|
|
||||||
checkRun:
|
|
||||||
types: ["created"]
|
|
||||||
status: "queued"
|
|
||||||
amount: 1
|
|
||||||
duration: "1m"
|
|
||||||
minReplicas: 0
|
|
||||||
maxReplicas: 5
|
|
||||||
metrics:
|
|
||||||
- type: PercentageRunnersBusy
|
|
||||||
scaleUpThreshold: '0.75'
|
|
||||||
scaleDownThreshold: '0.3'
|
|
||||||
scaleUpFactor: '2'
|
|
||||||
scaleDownFactor: '0.5'
|
|
||||||
- type: TotalNumberOfQueuedAndInProgressWorkflowRuns
|
|
||||||
repositoryNames:
|
|
||||||
- ${TEST_REPO}
|
|
||||||
37
acceptance/testdata/repo.runnerdeploy.yaml
vendored
37
acceptance/testdata/repo.runnerdeploy.yaml
vendored
@@ -1,37 +0,0 @@
|
|||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: RunnerDeployment
|
|
||||||
metadata:
|
|
||||||
name: example-runnerdeploy
|
|
||||||
spec:
|
|
||||||
# replicas: 1
|
|
||||||
template:
|
|
||||||
spec:
|
|
||||||
repository: ${TEST_REPO}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Custom runner image
|
|
||||||
#
|
|
||||||
image: ${RUNNER_NAME}:${RUNNER_TAG}
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
|
|
||||||
#
|
|
||||||
# dockerd within runner container
|
|
||||||
#
|
|
||||||
## Replace `mumoshu/actions-runner-dind:dev` with your dind image
|
|
||||||
#dockerdWithinRunnerContainer: true
|
|
||||||
#image: mumoshu/actions-runner-dind:dev
|
|
||||||
|
|
||||||
#
|
|
||||||
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
|
||||||
#
|
|
||||||
#dockerMTU: 1450
|
|
||||||
|
|
||||||
#Runner group
|
|
||||||
# labels:
|
|
||||||
# - "mylabel 1"
|
|
||||||
# - "mylabel 2"
|
|
||||||
|
|
||||||
#
|
|
||||||
# Non-standard working directory
|
|
||||||
#
|
|
||||||
# workDir: "/"
|
|
||||||
29
acceptance/testdata/repo.runnerset.hra.yaml
vendored
29
acceptance/testdata/repo.runnerset.hra.yaml
vendored
@@ -1,29 +0,0 @@
|
|||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: HorizontalRunnerAutoscaler
|
|
||||||
metadata:
|
|
||||||
name: example-runnerset
|
|
||||||
spec:
|
|
||||||
scaleTargetRef:
|
|
||||||
kind: RunnerSet
|
|
||||||
name: example-runnerset
|
|
||||||
scaleUpTriggers:
|
|
||||||
- githubEvent:
|
|
||||||
checkRun:
|
|
||||||
types: ["created"]
|
|
||||||
status: "queued"
|
|
||||||
amount: 1
|
|
||||||
duration: "1m"
|
|
||||||
# RunnerSet doesn't support scale from/to zero yet
|
|
||||||
minReplicas: 1
|
|
||||||
maxReplicas: 5
|
|
||||||
# This should be less than 600(seconds, the default) for faster testing
|
|
||||||
scaleDownDelaySecondsAfterScaleOut: 60
|
|
||||||
metrics:
|
|
||||||
- type: PercentageRunnersBusy
|
|
||||||
scaleUpThreshold: '0.75'
|
|
||||||
scaleDownThreshold: '0.3'
|
|
||||||
scaleUpFactor: '2'
|
|
||||||
scaleDownFactor: '0.5'
|
|
||||||
- type: TotalNumberOfQueuedAndInProgressWorkflowRuns
|
|
||||||
repositoryNames:
|
|
||||||
- ${TEST_REPO}
|
|
||||||
59
acceptance/testdata/repo.runnerset.yaml
vendored
59
acceptance/testdata/repo.runnerset.yaml
vendored
@@ -1,59 +0,0 @@
|
|||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
|
||||||
kind: RunnerSet
|
|
||||||
metadata:
|
|
||||||
name: example-runnerset
|
|
||||||
spec:
|
|
||||||
# MANDATORY because it is based on StatefulSet: Results in a below error when omitted:
|
|
||||||
# missing required field "selector" in dev.summerwind.actions.v1alpha1.RunnerSet.spec
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: example-runnerset
|
|
||||||
|
|
||||||
# MANDATORY because it is based on StatefulSet: Results in a below error when omitted:
|
|
||||||
# missing required field "serviceName" in dev.summerwind.actions.v1alpha1.RunnerSet.spec]
|
|
||||||
serviceName: example-runnerset
|
|
||||||
|
|
||||||
#replicas: 1
|
|
||||||
|
|
||||||
# From my limited testing, `ephemeral: true` is more reliable.
|
|
||||||
# Seomtimes, updating already deployed runners from `ephemeral: false` to `ephemeral: true` seems to
|
|
||||||
# result in queued jobs hanging forever.
|
|
||||||
ephemeral: ${TEST_EPHEMERAL}
|
|
||||||
|
|
||||||
repository: ${TEST_REPO}
|
|
||||||
#
|
|
||||||
# Custom runner image
|
|
||||||
#
|
|
||||||
image: ${RUNNER_NAME}:${RUNNER_TAG}
|
|
||||||
#
|
|
||||||
# dockerd within runner container
|
|
||||||
#
|
|
||||||
## Replace `mumoshu/actions-runner-dind:dev` with your dind image
|
|
||||||
#dockerdWithinRunnerContainer: true
|
|
||||||
#
|
|
||||||
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
|
||||||
#
|
|
||||||
#dockerMTU: 1450
|
|
||||||
#Runner group
|
|
||||||
# labels:
|
|
||||||
# - "mylabel 1"
|
|
||||||
# - "mylabel 2"
|
|
||||||
labels:
|
|
||||||
- "${RUNNER_LABEL}"
|
|
||||||
#
|
|
||||||
# Non-standard working directory
|
|
||||||
#
|
|
||||||
# workDir: "/"
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: example-runnerset
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: runner
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
env:
|
|
||||||
- name: RUNNER_FEATURE_FLAG_EPHEMERAL
|
|
||||||
value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
|
|
||||||
#- name: docker
|
|
||||||
# #image: mumoshu/actions-runner-dind:dev
|
|
||||||
@@ -1,12 +1,15 @@
|
|||||||
apiVersion: actions.summerwind.dev/v1alpha1
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
kind: RunnerDeployment
|
kind: RunnerDeployment
|
||||||
metadata:
|
metadata:
|
||||||
name: org-runnerdeploy
|
name: ${NAME}
|
||||||
spec:
|
spec:
|
||||||
# replicas: 1
|
# replicas: 1
|
||||||
template:
|
template:
|
||||||
spec:
|
spec:
|
||||||
|
enterprise: ${TEST_ENTERPRISE}
|
||||||
|
group: ${TEST_GROUP}
|
||||||
organization: ${TEST_ORG}
|
organization: ${TEST_ORG}
|
||||||
|
repository: ${TEST_REPO}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Custom runner image
|
# Custom runner image
|
||||||
@@ -14,12 +17,15 @@ spec:
|
|||||||
image: ${RUNNER_NAME}:${RUNNER_TAG}
|
image: ${RUNNER_NAME}:${RUNNER_TAG}
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
ephemeral: ${TEST_EPHEMERAL}
|
||||||
|
|
||||||
#
|
#
|
||||||
# dockerd within runner container
|
# dockerd within runner container
|
||||||
#
|
#
|
||||||
## Replace `mumoshu/actions-runner-dind:dev` with your dind image
|
## Replace `mumoshu/actions-runner-dind:dev` with your dind image
|
||||||
#dockerdWithinRunnerContainer: true
|
#dockerdWithinRunnerContainer: true
|
||||||
#image: mumoshu/actions-runner-dind:dev
|
#image: mumoshu/actions-runner-dind:dev
|
||||||
|
dockerdWithinRunnerContainer: ${RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
||||||
@@ -30,8 +36,26 @@ spec:
|
|||||||
# labels:
|
# labels:
|
||||||
# - "mylabel 1"
|
# - "mylabel 1"
|
||||||
# - "mylabel 2"
|
# - "mylabel 2"
|
||||||
|
labels:
|
||||||
|
- "${RUNNER_LABEL}"
|
||||||
|
|
||||||
#
|
#
|
||||||
# Non-standard working directory
|
# Non-standard working directory
|
||||||
#
|
#
|
||||||
# workDir: "/"
|
# workDir: "/"
|
||||||
|
---
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: HorizontalRunnerAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
name: ${NAME}
|
||||||
|
scaleUpTriggers:
|
||||||
|
- githubEvent:
|
||||||
|
workflowJob: {}
|
||||||
|
amount: 1
|
||||||
|
duration: "10m"
|
||||||
|
minReplicas: ${RUNNER_MIN_REPLICAS}
|
||||||
|
maxReplicas: 10
|
||||||
|
scaleDownDelaySecondsAfterScaleOut: ${RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT}
|
||||||
253
acceptance/testdata/runnerset.envsubst.yaml
vendored
Normal file
253
acceptance/testdata/runnerset.envsubst.yaml
vendored
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
---
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}-runner-work-dir
|
||||||
|
labels:
|
||||||
|
content: ${NAME}-runner-work-dir
|
||||||
|
provisioner: rancher.io/local-path
|
||||||
|
reclaimPolicy: Delete
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
|
---
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}
|
||||||
|
# In kind environments, the provider writes:
|
||||||
|
# /var/lib/docker/volumes/KIND_NODE_CONTAINER_VOL_ID/_data/local-path-provisioner/PV_NAME
|
||||||
|
# It can be hundreds of gigabytes depending on what you cache in the test workflow. Beware to not encounter `no space left on device` errors!
|
||||||
|
# If you did encounter no space errorrs try:
|
||||||
|
# docker system prune
|
||||||
|
# docker buildx prune #=> frees up /var/lib/docker/volumes/buildx_buildkit_container-builder0_state
|
||||||
|
# sudo rm -rf /var/lib/docker/volumes/KIND_NODE_CONTAINER_VOL_ID/_data/local-path-provisioner #=> frees up local-path-provisioner's data
|
||||||
|
provisioner: rancher.io/local-path
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
|
---
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}-var-lib-docker
|
||||||
|
labels:
|
||||||
|
content: ${NAME}-var-lib-docker
|
||||||
|
provisioner: rancher.io/local-path
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
|
---
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}-cache
|
||||||
|
labels:
|
||||||
|
content: ${NAME}-cache
|
||||||
|
provisioner: rancher.io/local-path
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
|
---
|
||||||
|
apiVersion: storage.k8s.io/v1
|
||||||
|
kind: StorageClass
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}-runner-tool-cache
|
||||||
|
labels:
|
||||||
|
content: ${NAME}-runner-tool-cache
|
||||||
|
provisioner: rancher.io/local-path
|
||||||
|
reclaimPolicy: Retain
|
||||||
|
volumeBindingMode: WaitForFirstConsumer
|
||||||
|
---
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerSet
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}
|
||||||
|
spec:
|
||||||
|
# MANDATORY because it is based on StatefulSet: Results in a below error when omitted:
|
||||||
|
# missing required field "selector" in dev.summerwind.actions.v1alpha1.RunnerSet.spec
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: ${NAME}
|
||||||
|
|
||||||
|
# MANDATORY because it is based on StatefulSet: Results in a below error when omitted:
|
||||||
|
# missing required field "serviceName" in dev.summerwind.actions.v1alpha1.RunnerSet.spec]
|
||||||
|
serviceName: ${NAME}
|
||||||
|
|
||||||
|
#replicas: 1
|
||||||
|
|
||||||
|
# From my limited testing, `ephemeral: true` is more reliable.
|
||||||
|
# Seomtimes, updating already deployed runners from `ephemeral: false` to `ephemeral: true` seems to
|
||||||
|
# result in queued jobs hanging forever.
|
||||||
|
ephemeral: ${TEST_EPHEMERAL}
|
||||||
|
|
||||||
|
enterprise: ${TEST_ENTERPRISE}
|
||||||
|
group: ${TEST_GROUP}
|
||||||
|
organization: ${TEST_ORG}
|
||||||
|
repository: ${TEST_REPO}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Custom runner image
|
||||||
|
#
|
||||||
|
image: ${RUNNER_NAME}:${RUNNER_TAG}
|
||||||
|
|
||||||
|
#
|
||||||
|
# dockerd within runner container
|
||||||
|
#
|
||||||
|
## Replace `mumoshu/actions-runner-dind:dev` with your dind image
|
||||||
|
#dockerdWithinRunnerContainer: true
|
||||||
|
dockerdWithinRunnerContainer: ${RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
||||||
|
#
|
||||||
|
#dockerMTU: 1450
|
||||||
|
#Runner group
|
||||||
|
# labels:
|
||||||
|
# - "mylabel 1"
|
||||||
|
# - "mylabel 2"
|
||||||
|
labels:
|
||||||
|
- "${RUNNER_LABEL}"
|
||||||
|
#
|
||||||
|
# Non-standard working directory
|
||||||
|
#
|
||||||
|
# workDir: "/"
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ${NAME}
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: RUNNER_FEATURE_FLAG_EPHEMERAL
|
||||||
|
value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
|
||||||
|
- name: GOMODCACHE
|
||||||
|
value: "/home/runner/.cache/go-mod"
|
||||||
|
# PV-backed runner work dir
|
||||||
|
volumeMounts:
|
||||||
|
- name: work
|
||||||
|
mountPath: /runner/_work
|
||||||
|
# Cache docker image layers, in case dockerdWithinRunnerContainer=true
|
||||||
|
- name: var-lib-docker
|
||||||
|
mountPath: /var/lib/docker
|
||||||
|
# Cache go modules and builds
|
||||||
|
# - name: gocache
|
||||||
|
# # Run `goenv | grep GOCACHE` to verify the path is correct for your env
|
||||||
|
# mountPath: /home/runner/.cache/go-build
|
||||||
|
# - name: gomodcache
|
||||||
|
# # Run `goenv | grep GOMODCACHE` to verify the path is correct for your env
|
||||||
|
# # mountPath: /home/runner/go/pkg/mod
|
||||||
|
- name: cache
|
||||||
|
# go: could not create module cache: stat /home/runner/.cache/go-mod: permission denied
|
||||||
|
mountPath: "/home/runner/.cache"
|
||||||
|
- name: runner-tool-cache
|
||||||
|
# This corresponds to our runner image's default setting of RUNNER_TOOL_CACHE=/opt/hostedtoolcache.
|
||||||
|
#
|
||||||
|
# In case you customize the envvar in both runner and docker containers of the runner pod spec,
|
||||||
|
# You'd need to change this mountPath accordingly.
|
||||||
|
#
|
||||||
|
# The tool cache directory is defined in actions/toolkit's tool-cache module:
|
||||||
|
# https://github.com/actions/toolkit/blob/2f164000dcd42fb08287824a3bc3030dbed33687/packages/tool-cache/src/tool-cache.ts#L621-L638
|
||||||
|
#
|
||||||
|
# Many setup-* actions like setup-go utilizes the tool-cache module to download and cache installed binaries:
|
||||||
|
# https://github.com/actions/setup-go/blob/56a61c9834b4a4950dbbf4740af0b8a98c73b768/src/installer.ts#L144
|
||||||
|
mountPath: "/opt/hostedtoolcache"
|
||||||
|
# Valid only when dockerdWithinRunnerContainer=false
|
||||||
|
- name: docker
|
||||||
|
# PV-backed runner work dir
|
||||||
|
volumeMounts:
|
||||||
|
- name: work
|
||||||
|
mountPath: /runner/_work
|
||||||
|
# Cache docker image layers, in case dockerdWithinRunnerContainer=false
|
||||||
|
- name: var-lib-docker
|
||||||
|
mountPath: /var/lib/docker
|
||||||
|
# image: mumoshu/actions-runner-dind:dev
|
||||||
|
|
||||||
|
# For buildx cache
|
||||||
|
- name: cache
|
||||||
|
mountPath: "/home/runner/.cache"
|
||||||
|
volumes:
|
||||||
|
- name: work
|
||||||
|
ephemeral:
|
||||||
|
volumeClaimTemplate:
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: "${NAME}-runner-work-dir"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: vol1
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: ${NAME}
|
||||||
|
## Dunno which provider supports auto-provisioning with selector.
|
||||||
|
## At least the rancher local path provider stopped with:
|
||||||
|
## waiting for a volume to be created, either by external provisioner "rancher.io/local-path" or manually created by system administrator
|
||||||
|
# selector:
|
||||||
|
# matchLabels:
|
||||||
|
# runnerset-volume-id: ${NAME}-vol1
|
||||||
|
- metadata:
|
||||||
|
name: vol2
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: ${NAME}
|
||||||
|
# selector:
|
||||||
|
# matchLabels:
|
||||||
|
# runnerset-volume-id: ${NAME}-vol2
|
||||||
|
- metadata:
|
||||||
|
name: var-lib-docker
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: ${NAME}-var-lib-docker
|
||||||
|
- metadata:
|
||||||
|
name: cache
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: ${NAME}-cache
|
||||||
|
- metadata:
|
||||||
|
name: runner-tool-cache
|
||||||
|
# It turns out labels doesn't distinguish PVs across PVCs and the
|
||||||
|
# end result is PVs are reused by wrong PVCs.
|
||||||
|
# The correct way seems to be to differentiate storage class per pvc template.
|
||||||
|
# labels:
|
||||||
|
# id: runner-tool-cache
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Mi
|
||||||
|
storageClassName: ${NAME}-runner-tool-cache
|
||||||
|
---
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: HorizontalRunnerAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: ${NAME}
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
kind: RunnerSet
|
||||||
|
name: ${NAME}
|
||||||
|
scaleUpTriggers:
|
||||||
|
- githubEvent:
|
||||||
|
workflowJob: {}
|
||||||
|
amount: 1
|
||||||
|
duration: "10m"
|
||||||
|
minReplicas: ${RUNNER_MIN_REPLICAS}
|
||||||
|
maxReplicas: 10
|
||||||
|
scaleDownDelaySecondsAfterScaleOut: ${RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT}
|
||||||
@@ -1,12 +1,15 @@
|
|||||||
# Set actions-runner-controller settings for testing
|
# Set actions-runner-controller settings for testing
|
||||||
githubAPICacheDuration: 10s
|
logLevel: "-4"
|
||||||
githubWebhookServer:
|
githubWebhookServer:
|
||||||
|
logLevel: "-4"
|
||||||
enabled: true
|
enabled: true
|
||||||
labels: {}
|
labels: {}
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
syncPeriod: 10m
|
syncPeriod: 10m
|
||||||
|
useRunnerGroupsVisibility: true
|
||||||
secret:
|
secret:
|
||||||
create: true
|
enabled: true
|
||||||
|
# create: true
|
||||||
name: "github-webhook-server"
|
name: "github-webhook-server"
|
||||||
### GitHub Webhook Configuration
|
### GitHub Webhook Configuration
|
||||||
#github_webhook_secret_token: ""
|
#github_webhook_secret_token: ""
|
||||||
|
|||||||
@@ -72,10 +72,12 @@ type GitHubEventScaleUpTriggerSpec struct {
|
|||||||
CheckRun *CheckRunSpec `json:"checkRun,omitempty"`
|
CheckRun *CheckRunSpec `json:"checkRun,omitempty"`
|
||||||
PullRequest *PullRequestSpec `json:"pullRequest,omitempty"`
|
PullRequest *PullRequestSpec `json:"pullRequest,omitempty"`
|
||||||
Push *PushSpec `json:"push,omitempty"`
|
Push *PushSpec `json:"push,omitempty"`
|
||||||
|
WorkflowJob *WorkflowJobSpec `json:"workflowJob,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://docs.github.com/en/actions/reference/events-that-trigger-workflows#check_run
|
// https://docs.github.com/en/actions/reference/events-that-trigger-workflows#check_run
|
||||||
type CheckRunSpec struct {
|
type CheckRunSpec struct {
|
||||||
|
// One of: created, rerequested, or completed
|
||||||
Types []string `json:"types,omitempty"`
|
Types []string `json:"types,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
|
||||||
@@ -90,6 +92,10 @@ type CheckRunSpec struct {
|
|||||||
Repositories []string `json:"repositories,omitempty"`
|
Repositories []string `json:"repositories,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job
|
||||||
|
type WorkflowJobSpec struct {
|
||||||
|
}
|
||||||
|
|
||||||
// https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
|
// https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
|
||||||
type PullRequestSpec struct {
|
type PullRequestSpec struct {
|
||||||
Types []string `json:"types,omitempty"`
|
Types []string `json:"types,omitempty"`
|
||||||
@@ -107,6 +113,9 @@ type CapacityReservation struct {
|
|||||||
Name string `json:"name,omitempty"`
|
Name string `json:"name,omitempty"`
|
||||||
ExpirationTime metav1.Time `json:"expirationTime,omitempty"`
|
ExpirationTime metav1.Time `json:"expirationTime,omitempty"`
|
||||||
Replicas int `json:"replicas,omitempty"`
|
Replicas int `json:"replicas,omitempty"`
|
||||||
|
|
||||||
|
// +optional
|
||||||
|
EffectiveTime metav1.Time `json:"effectiveTime,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ScaleTargetRef struct {
|
type ScaleTargetRef struct {
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ type RunnerPodSpec struct {
|
|||||||
HostAliases []corev1.HostAlias `json:"hostAliases,omitempty"`
|
HostAliases []corev1.HostAlias `json:"hostAliases,omitempty"`
|
||||||
|
|
||||||
// +optional
|
// +optional
|
||||||
TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraint,omitempty"`
|
TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
|
||||||
|
|
||||||
// RuntimeClassName is the container runtime configuration that containers should run under.
|
// RuntimeClassName is the container runtime configuration that containers should run under.
|
||||||
// More info: https://kubernetes.io/docs/concepts/containers/runtime-class
|
// More info: https://kubernetes.io/docs/concepts/containers/runtime-class
|
||||||
@@ -153,7 +153,7 @@ type RunnerPodSpec struct {
|
|||||||
RuntimeClassName *string `json:"runtimeClassName,omitempty"`
|
RuntimeClassName *string `json:"runtimeClassName,omitempty"`
|
||||||
|
|
||||||
// +optional
|
// +optional
|
||||||
DnsConfig []corev1.PodDNSConfig `json:"dnsConfig,omitempty"`
|
DnsConfig *corev1.PodDNSConfig `json:"dnsConfig,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ValidateRepository validates repository field.
|
// ValidateRepository validates repository field.
|
||||||
@@ -181,6 +181,9 @@ func (rs *RunnerSpec) ValidateRepository() error {
|
|||||||
|
|
||||||
// RunnerStatus defines the observed state of Runner
|
// RunnerStatus defines the observed state of Runner
|
||||||
type RunnerStatus struct {
|
type RunnerStatus struct {
|
||||||
|
// Turns true only if the runner pod is ready.
|
||||||
|
// +optional
|
||||||
|
Ready bool `json:"ready"`
|
||||||
// +optional
|
// +optional
|
||||||
Registration RunnerStatusRegistration `json:"registration"`
|
Registration RunnerStatusRegistration `json:"registration"`
|
||||||
// +optional
|
// +optional
|
||||||
|
|||||||
@@ -31,6 +31,14 @@ type RunnerDeploymentSpec struct {
|
|||||||
// +nullable
|
// +nullable
|
||||||
Replicas *int `json:"replicas,omitempty"`
|
Replicas *int `json:"replicas,omitempty"`
|
||||||
|
|
||||||
|
// EffectiveTime is the time the upstream controller requested to sync Replicas.
|
||||||
|
// It is usually populated by the webhook-based autoscaler via HRA.
|
||||||
|
// The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
|
||||||
|
//
|
||||||
|
// +optional
|
||||||
|
// +nullable
|
||||||
|
EffectiveTime *metav1.Time `json:"effectiveTime"`
|
||||||
|
|
||||||
// +optional
|
// +optional
|
||||||
// +nullable
|
// +nullable
|
||||||
Selector *metav1.LabelSelector `json:"selector"`
|
Selector *metav1.LabelSelector `json:"selector"`
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// log is for logging in this package.
|
// log is for logging in this package.
|
||||||
var runenrDeploymentLog = logf.Log.WithName("runnerdeployment-resource")
|
var runnerDeploymentLog = logf.Log.WithName("runnerdeployment-resource")
|
||||||
|
|
||||||
func (r *RunnerDeployment) SetupWebhookWithManager(mgr ctrl.Manager) error {
|
func (r *RunnerDeployment) SetupWebhookWithManager(mgr ctrl.Manager) error {
|
||||||
return ctrl.NewWebhookManagedBy(mgr).
|
return ctrl.NewWebhookManagedBy(mgr).
|
||||||
@@ -49,13 +49,13 @@ var _ webhook.Validator = &RunnerDeployment{}
|
|||||||
|
|
||||||
// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
|
// ValidateCreate implements webhook.Validator so a webhook will be registered for the type
|
||||||
func (r *RunnerDeployment) ValidateCreate() error {
|
func (r *RunnerDeployment) ValidateCreate() error {
|
||||||
runenrDeploymentLog.Info("validate resource to be created", "name", r.Name)
|
runnerDeploymentLog.Info("validate resource to be created", "name", r.Name)
|
||||||
return r.Validate()
|
return r.Validate()
|
||||||
}
|
}
|
||||||
|
|
||||||
// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
|
// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
|
||||||
func (r *RunnerDeployment) ValidateUpdate(old runtime.Object) error {
|
func (r *RunnerDeployment) ValidateUpdate(old runtime.Object) error {
|
||||||
runenrDeploymentLog.Info("validate resource to be updated", "name", r.Name)
|
runnerDeploymentLog.Info("validate resource to be updated", "name", r.Name)
|
||||||
return r.Validate()
|
return r.Validate()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,15 @@ type RunnerReplicaSetSpec struct {
|
|||||||
// +nullable
|
// +nullable
|
||||||
Replicas *int `json:"replicas,omitempty"`
|
Replicas *int `json:"replicas,omitempty"`
|
||||||
|
|
||||||
|
// EffectiveTime is the time the upstream controller requested to sync Replicas.
|
||||||
|
// It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment.
|
||||||
|
// The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners
|
||||||
|
// based on potentially outdated Replicas value.
|
||||||
|
//
|
||||||
|
// +optional
|
||||||
|
// +nullable
|
||||||
|
EffectiveTime *metav1.Time `json:"effectiveTime"`
|
||||||
|
|
||||||
// +optional
|
// +optional
|
||||||
// +nullable
|
// +nullable
|
||||||
Selector *metav1.LabelSelector `json:"selector"`
|
Selector *metav1.LabelSelector `json:"selector"`
|
||||||
|
|||||||
@@ -25,6 +25,14 @@ import (
|
|||||||
type RunnerSetSpec struct {
|
type RunnerSetSpec struct {
|
||||||
RunnerConfig `json:",inline"`
|
RunnerConfig `json:",inline"`
|
||||||
|
|
||||||
|
// EffectiveTime is the time the upstream controller requested to sync Replicas.
|
||||||
|
// It is usually populated by the webhook-based autoscaler via HRA.
|
||||||
|
// It is used to prevent ephemeral runners from unnecessarily recreated.
|
||||||
|
//
|
||||||
|
// +optional
|
||||||
|
// +nullable
|
||||||
|
EffectiveTime *metav1.Time `json:"effectiveTime,omitempty"`
|
||||||
|
|
||||||
appsv1.StatefulSetSpec `json:",inline"`
|
appsv1.StatefulSetSpec `json:",inline"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ func (in *CacheEntry) DeepCopy() *CacheEntry {
|
|||||||
func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) {
|
func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) {
|
||||||
*out = *in
|
*out = *in
|
||||||
in.ExpirationTime.DeepCopyInto(&out.ExpirationTime)
|
in.ExpirationTime.DeepCopyInto(&out.ExpirationTime)
|
||||||
|
in.EffectiveTime.DeepCopyInto(&out.EffectiveTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation.
|
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation.
|
||||||
@@ -107,6 +108,11 @@ func (in *GitHubEventScaleUpTriggerSpec) DeepCopyInto(out *GitHubEventScaleUpTri
|
|||||||
*out = new(PushSpec)
|
*out = new(PushSpec)
|
||||||
**out = **in
|
**out = **in
|
||||||
}
|
}
|
||||||
|
if in.WorkflowJob != nil {
|
||||||
|
in, out := &in.WorkflowJob, &out.WorkflowJob
|
||||||
|
*out = new(WorkflowJobSpec)
|
||||||
|
**out = **in
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitHubEventScaleUpTriggerSpec.
|
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitHubEventScaleUpTriggerSpec.
|
||||||
@@ -498,6 +504,10 @@ func (in *RunnerDeploymentSpec) DeepCopyInto(out *RunnerDeploymentSpec) {
|
|||||||
*out = new(int)
|
*out = new(int)
|
||||||
**out = **in
|
**out = **in
|
||||||
}
|
}
|
||||||
|
if in.EffectiveTime != nil {
|
||||||
|
in, out := &in.EffectiveTime, &out.EffectiveTime
|
||||||
|
*out = (*in).DeepCopy()
|
||||||
|
}
|
||||||
if in.Selector != nil {
|
if in.Selector != nil {
|
||||||
in, out := &in.Selector, &out.Selector
|
in, out := &in.Selector, &out.Selector
|
||||||
*out = new(metav1.LabelSelector)
|
*out = new(metav1.LabelSelector)
|
||||||
@@ -728,10 +738,8 @@ func (in *RunnerPodSpec) DeepCopyInto(out *RunnerPodSpec) {
|
|||||||
}
|
}
|
||||||
if in.DnsConfig != nil {
|
if in.DnsConfig != nil {
|
||||||
in, out := &in.DnsConfig, &out.DnsConfig
|
in, out := &in.DnsConfig, &out.DnsConfig
|
||||||
*out = make([]v1.PodDNSConfig, len(*in))
|
*out = new(v1.PodDNSConfig)
|
||||||
for i := range *in {
|
(*in).DeepCopyInto(*out)
|
||||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -812,6 +820,10 @@ func (in *RunnerReplicaSetSpec) DeepCopyInto(out *RunnerReplicaSetSpec) {
|
|||||||
*out = new(int)
|
*out = new(int)
|
||||||
**out = **in
|
**out = **in
|
||||||
}
|
}
|
||||||
|
if in.EffectiveTime != nil {
|
||||||
|
in, out := &in.EffectiveTime, &out.EffectiveTime
|
||||||
|
*out = (*in).DeepCopy()
|
||||||
|
}
|
||||||
if in.Selector != nil {
|
if in.Selector != nil {
|
||||||
in, out := &in.Selector, &out.Selector
|
in, out := &in.Selector, &out.Selector
|
||||||
*out = new(metav1.LabelSelector)
|
*out = new(metav1.LabelSelector)
|
||||||
@@ -923,6 +935,10 @@ func (in *RunnerSetList) DeepCopyObject() runtime.Object {
|
|||||||
func (in *RunnerSetSpec) DeepCopyInto(out *RunnerSetSpec) {
|
func (in *RunnerSetSpec) DeepCopyInto(out *RunnerSetSpec) {
|
||||||
*out = *in
|
*out = *in
|
||||||
in.RunnerConfig.DeepCopyInto(&out.RunnerConfig)
|
in.RunnerConfig.DeepCopyInto(&out.RunnerConfig)
|
||||||
|
if in.EffectiveTime != nil {
|
||||||
|
in, out := &in.EffectiveTime, &out.EffectiveTime
|
||||||
|
*out = (*in).DeepCopy()
|
||||||
|
}
|
||||||
in.StatefulSetSpec.DeepCopyInto(&out.StatefulSetSpec)
|
in.StatefulSetSpec.DeepCopyInto(&out.StatefulSetSpec)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1109,3 +1125,18 @@ func (in *ScheduledOverride) DeepCopy() *ScheduledOverride {
|
|||||||
in.DeepCopyInto(out)
|
in.DeepCopyInto(out)
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||||
|
func (in *WorkflowJobSpec) DeepCopyInto(out *WorkflowJobSpec) {
|
||||||
|
*out = *in
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkflowJobSpec.
|
||||||
|
func (in *WorkflowJobSpec) DeepCopy() *WorkflowJobSpec {
|
||||||
|
if in == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := new(WorkflowJobSpec)
|
||||||
|
in.DeepCopyInto(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|||||||
@@ -15,10 +15,10 @@ type: application
|
|||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 0.15.3
|
version: 0.18.0
|
||||||
|
|
||||||
# Used as the default manager tag value when no tag property is provided in the values.yaml
|
# Used as the default manager tag value when no tag property is provided in the values.yaml
|
||||||
appVersion: 0.20.4
|
appVersion: 0.23.0
|
||||||
|
|
||||||
home: https://github.com/actions-runner-controller/actions-runner-controller
|
home: https://github.com/actions-runner-controller/actions-runner-controller
|
||||||
|
|
||||||
|
|||||||
@@ -4,18 +4,18 @@ All additional docs are kept in the `docs/` folder, this README is solely for do
|
|||||||
|
|
||||||
## Values
|
## Values
|
||||||
|
|
||||||
**_The values are documented as of HEAD, to review the configuration options for your chart version ensure you view this file at the relevent [tag](https://github.com/actions-runner-controller/actions-runner-controller/tags)_**
|
**_The values are documented as of HEAD, to review the configuration options for your chart version ensure you view this file at the relevant [tag](https://github.com/actions-runner-controller/actions-runner-controller/tags)_**
|
||||||
|
|
||||||
> _Default values are the defaults set in the charts values.yaml, some properties have default configurations in the code for when the property is omitted or invalid_
|
> _Default values are the defaults set in the charts `values.yaml`, some properties have default configurations in the code for when the property is omitted or invalid_
|
||||||
|
|
||||||
| Key | Description | Default |
|
| Key | Description | Default |
|
||||||
|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------|
|
|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------|
|
||||||
| `labels` | Set labels to apply to all resources in the chart | |
|
| `labels` | Set labels to apply to all resources in the chart | |
|
||||||
| `replicaCount` | Set the number of controller pods | 1 |
|
| `replicaCount` | Set the number of controller pods | 1 |
|
||||||
|
| `webhookPort` | Set the containerPort for the webhook Pod | 9443 |
|
||||||
| `syncPeriod` | Set the period in which the controler reconciles the desired runners count | 10m |
|
| `syncPeriod` | Set the period in which the controler reconciles the desired runners count | 10m |
|
||||||
| `enableLeaderElection` | Enable election configuration | true |
|
| `enableLeaderElection` | Enable election configuration | true |
|
||||||
| `leaderElectionId` | Set the election ID for the controller group | |
|
| `leaderElectionId` | Set the election ID for the controller group | |
|
||||||
| `githubAPICacheDuration` | Set the cache period for API calls | |
|
|
||||||
| `githubEnterpriseServerURL` | Set the URL for a self-hosted GitHub Enterprise Server | |
|
| `githubEnterpriseServerURL` | Set the URL for a self-hosted GitHub Enterprise Server | |
|
||||||
| `githubURL` | Override GitHub URL to be used for GitHub API calls | |
|
| `githubURL` | Override GitHub URL to be used for GitHub API calls | |
|
||||||
| `githubUploadURL` | Override GitHub Upload URL to be used for GitHub API calls | |
|
| `githubUploadURL` | Override GitHub Upload URL to be used for GitHub API calls | |
|
||||||
@@ -33,6 +33,7 @@ All additional docs are kept in the `docs/` folder, this README is solely for do
|
|||||||
| `authSecret.github_basicauth_username` | Username for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API | |
|
| `authSecret.github_basicauth_username` | Username for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API | |
|
||||||
| `authSecret.github_basicauth_password` | Password for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API | |
|
| `authSecret.github_basicauth_password` | Password for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API | |
|
||||||
| `dockerRegistryMirror` | The default Docker Registry Mirror used by runners. | |
|
| `dockerRegistryMirror` | The default Docker Registry Mirror used by runners. | |
|
||||||
|
| `hostNetwork` | The "hostNetwork" of the controller container | false |
|
||||||
| `image.repository` | The "repository/image" of the controller container | summerwind/actions-runner-controller |
|
| `image.repository` | The "repository/image" of the controller container | summerwind/actions-runner-controller |
|
||||||
| `image.tag` | The tag of the controller container | |
|
| `image.tag` | The tag of the controller container | |
|
||||||
| `image.actionsRunnerRepositoryAndTag` | The "repository/image" of the actions runner container | summerwind/actions-runner:latest |
|
| `image.actionsRunnerRepositoryAndTag` | The "repository/image" of the actions runner container | summerwind/actions-runner:latest |
|
||||||
@@ -49,7 +50,7 @@ All additional docs are kept in the `docs/` folder, this README is solely for do
|
|||||||
| `imagePullSecrets` | Specifies the secret to be used when pulling the controller pod containers | |
|
| `imagePullSecrets` | Specifies the secret to be used when pulling the controller pod containers | |
|
||||||
| `fullnameOverride` | Override the full resource names | |
|
| `fullnameOverride` | Override the full resource names | |
|
||||||
| `nameOverride` | Override the resource name prefix | |
|
| `nameOverride` | Override the resource name prefix | |
|
||||||
| `serviceAccont.annotations` | Set annotations to the service account | |
|
| `serviceAccount.annotations` | Set annotations to the service account | |
|
||||||
| `serviceAccount.create` | Deploy the controller pod under a service account | true |
|
| `serviceAccount.create` | Deploy the controller pod under a service account | true |
|
||||||
| `podAnnotations` | Set annotations for the controller pod | |
|
| `podAnnotations` | Set annotations for the controller pod | |
|
||||||
| `podLabels` | Set labels for the controller pod | |
|
| `podLabels` | Set labels for the controller pod | |
|
||||||
@@ -75,8 +76,10 @@ All additional docs are kept in the `docs/` folder, this README is solely for do
|
|||||||
| `admissionWebHooks.caBundle` | Base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate | |
|
| `admissionWebHooks.caBundle` | Base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate | |
|
||||||
| `githubWebhookServer.logLevel` | Set the log level of the githubWebhookServer container | |
|
| `githubWebhookServer.logLevel` | Set the log level of the githubWebhookServer container | |
|
||||||
| `githubWebhookServer.replicaCount` | Set the number of webhook server pods | 1 |
|
| `githubWebhookServer.replicaCount` | Set the number of webhook server pods | 1 |
|
||||||
|
| `githubWebhookServer.useRunnerGroupsVisibility` | Enable supporting runner groups with custom visibility. This will incur in extra API calls and may blow up your budget. Currently, you also need to set `githubWebhookServer.secret.enabled` to enable this feature. | false |
|
||||||
| `githubWebhookServer.syncPeriod` | Set the period in which the controller reconciles the resources | 10m |
|
| `githubWebhookServer.syncPeriod` | Set the period in which the controller reconciles the resources | 10m |
|
||||||
| `githubWebhookServer.enabled` | Deploy the webhook server pod | false |
|
| `githubWebhookServer.enabled` | Deploy the webhook server pod | false |
|
||||||
|
| `githubWebhookServer.secret.enabled` | Passes the webhook hook secret to the github-webhook-server | false |
|
||||||
| `githubWebhookServer.secret.create` | Deploy the webhook hook secret | false |
|
| `githubWebhookServer.secret.create` | Deploy the webhook hook secret | false |
|
||||||
| `githubWebhookServer.secret.name` | Set the name of the webhook hook secret | github-webhook-server |
|
| `githubWebhookServer.secret.name` | Set the name of the webhook hook secret | github-webhook-server |
|
||||||
| `githubWebhookServer.secret.github_webhook_secret_token` | Set the webhook secret token value | |
|
| `githubWebhookServer.secret.github_webhook_secret_token` | Set the webhook secret token value | |
|
||||||
|
|||||||
@@ -49,6 +49,9 @@ spec:
|
|||||||
items:
|
items:
|
||||||
description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime.
|
description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime.
|
||||||
properties:
|
properties:
|
||||||
|
effectiveTime:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
expirationTime:
|
expirationTime:
|
||||||
format: date-time
|
format: date-time
|
||||||
type: string
|
type: string
|
||||||
@@ -138,6 +141,7 @@ spec:
|
|||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
types:
|
types:
|
||||||
|
description: 'One of: created, rerequested, or completed'
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
@@ -157,6 +161,9 @@ spec:
|
|||||||
push:
|
push:
|
||||||
description: PushSpec is the condition for triggering scale-up on push event Also see https://docs.github.com/en/actions/reference/events-that-trigger-workflows#push
|
description: PushSpec is the condition for triggering scale-up on push event Also see https://docs.github.com/en/actions/reference/events-that-trigger-workflows#push
|
||||||
type: object
|
type: object
|
||||||
|
workflowJob:
|
||||||
|
description: https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job
|
||||||
|
type: object
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
|
|||||||
@@ -48,6 +48,11 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
description: RunnerDeploymentSpec defines the desired state of RunnerDeployment
|
description: RunnerDeploymentSpec defines the desired state of RunnerDeployment
|
||||||
properties:
|
properties:
|
||||||
|
effectiveTime:
|
||||||
|
description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
|
||||||
|
format: date-time
|
||||||
|
nullable: true
|
||||||
|
type: string
|
||||||
replicas:
|
replicas:
|
||||||
nullable: true
|
nullable: true
|
||||||
type: integer
|
type: integer
|
||||||
@@ -1349,33 +1354,31 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
dnsConfig:
|
dnsConfig:
|
||||||
items:
|
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
||||||
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
properties:
|
||||||
properties:
|
nameservers:
|
||||||
nameservers:
|
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
||||||
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
options:
|
||||||
options:
|
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
||||||
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
items:
|
||||||
items:
|
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
||||||
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
properties:
|
||||||
properties:
|
name:
|
||||||
name:
|
description: Required.
|
||||||
description: Required.
|
type: string
|
||||||
type: string
|
value:
|
||||||
value:
|
type: string
|
||||||
type: string
|
type: object
|
||||||
type: object
|
type: array
|
||||||
type: array
|
searches:
|
||||||
searches:
|
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
||||||
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
type: object
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
dockerEnabled:
|
dockerEnabled:
|
||||||
type: boolean
|
type: boolean
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
@@ -4152,7 +4155,7 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
topologySpreadConstraint:
|
topologySpreadConstraints:
|
||||||
items:
|
items:
|
||||||
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
@@ -45,6 +45,11 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet
|
description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet
|
||||||
properties:
|
properties:
|
||||||
|
effectiveTime:
|
||||||
|
description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment. The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners based on potentially outdated Replicas value.
|
||||||
|
format: date-time
|
||||||
|
nullable: true
|
||||||
|
type: string
|
||||||
replicas:
|
replicas:
|
||||||
nullable: true
|
nullable: true
|
||||||
type: integer
|
type: integer
|
||||||
@@ -1346,33 +1351,31 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
dnsConfig:
|
dnsConfig:
|
||||||
items:
|
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
||||||
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
properties:
|
||||||
properties:
|
nameservers:
|
||||||
nameservers:
|
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
||||||
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
options:
|
||||||
options:
|
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
||||||
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
items:
|
||||||
items:
|
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
||||||
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
properties:
|
||||||
properties:
|
name:
|
||||||
name:
|
description: Required.
|
||||||
description: Required.
|
type: string
|
||||||
type: string
|
value:
|
||||||
value:
|
type: string
|
||||||
type: string
|
type: object
|
||||||
type: object
|
type: array
|
||||||
type: array
|
searches:
|
||||||
searches:
|
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
||||||
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
type: object
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
dockerEnabled:
|
dockerEnabled:
|
||||||
type: boolean
|
type: boolean
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
@@ -4149,7 +4152,7 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
topologySpreadConstraint:
|
topologySpreadConstraints:
|
||||||
items:
|
items:
|
||||||
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
@@ -1292,33 +1292,31 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
dnsConfig:
|
dnsConfig:
|
||||||
items:
|
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
||||||
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
properties:
|
||||||
properties:
|
nameservers:
|
||||||
nameservers:
|
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
||||||
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
options:
|
||||||
options:
|
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
||||||
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
items:
|
||||||
items:
|
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
||||||
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
properties:
|
||||||
properties:
|
name:
|
||||||
name:
|
description: Required.
|
||||||
description: Required.
|
type: string
|
||||||
type: string
|
value:
|
||||||
value:
|
type: string
|
||||||
type: string
|
type: object
|
||||||
type: object
|
type: array
|
||||||
type: array
|
searches:
|
||||||
searches:
|
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
||||||
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
type: object
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
dockerEnabled:
|
dockerEnabled:
|
||||||
type: boolean
|
type: boolean
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
@@ -4095,7 +4093,7 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
topologySpreadConstraint:
|
topologySpreadConstraints:
|
||||||
items:
|
items:
|
||||||
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||||
properties:
|
properties:
|
||||||
@@ -5126,6 +5124,9 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
phase:
|
phase:
|
||||||
type: string
|
type: string
|
||||||
|
ready:
|
||||||
|
description: Turns true only if the runner pod is ready.
|
||||||
|
type: boolean
|
||||||
reason:
|
reason:
|
||||||
type: string
|
type: string
|
||||||
registration:
|
registration:
|
||||||
|
|||||||
@@ -55,6 +55,11 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
dockerdWithinRunnerContainer:
|
dockerdWithinRunnerContainer:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
effectiveTime:
|
||||||
|
description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. It is used to prevent ephemeral runners from unnecessarily recreated.
|
||||||
|
format: date-time
|
||||||
|
nullable: true
|
||||||
|
type: string
|
||||||
enterprise:
|
enterprise:
|
||||||
pattern: ^[^/]+$
|
pattern: ^[^/]+$
|
||||||
type: string
|
type: string
|
||||||
|
|||||||
@@ -18,20 +18,23 @@ Due to the above you can't just do a `helm upgrade` to release the latest versio
|
|||||||
|
|
||||||
## Steps
|
## Steps
|
||||||
|
|
||||||
1. Upgrade CRDs
|
1. Upgrade CRDs, this isn't optional, the CRDs you are using must be those that correspond with the version of the controller you are installing
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# REMEMBER TO UPDATE THE CHART_VERSION TO RELEVANT CHART VERISON!!!!
|
# REMEMBER TO UPDATE THE CHART_VERSION TO RELEVANT CHART VERISON!!!!
|
||||||
CHART_VERSION=0.14.0
|
CHART_VERSION=0.18.0
|
||||||
|
|
||||||
curl -L https://github.com/actions-runner-controller/actions-runner-controller/releases/download/actions-runner-controller-${CHART_VERSION}/actions-runner-controller-${CHART_VERSION}.tgz | tar zxv --strip 1 actions-runner-controller/crds
|
curl -L https://github.com/actions-runner-controller/actions-runner-controller/releases/download/actions-runner-controller-${CHART_VERSION}/actions-runner-controller-${CHART_VERSION}.tgz | tar zxv --strip 1 actions-runner-controller/crds
|
||||||
|
|
||||||
kubectl apply -f crds/
|
kubectl replace -f crds/
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Upgrade the Helm release
|
2. Upgrade the Helm release
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
# helm repo [command]
|
||||||
|
helm repo update
|
||||||
|
|
||||||
# helm upgrade [RELEASE] [CHART] [flags]
|
# helm upgrade [RELEASE] [CHART] [flags]
|
||||||
helm upgrade actions-runner-controller \
|
helm upgrade actions-runner-controller \
|
||||||
actions-runner-controller/actions-runner-controller \
|
actions-runner-controller/actions-runner-controller \
|
||||||
|
|||||||
@@ -68,6 +68,10 @@ Create the name of the service account to use
|
|||||||
{{- default (include "actions-runner-controller.fullname" .) .Values.authSecret.name -}}
|
{{- default (include "actions-runner-controller.fullname" .) .Values.authSecret.name -}}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
|
{{- define "actions-runner-controller.githubWebhookServerSecretName" -}}
|
||||||
|
{{- default (include "actions-runner-controller.fullname" .) .Values.githubWebhookServer.secret.name -}}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{- define "actions-runner-controller.leaderElectionRoleName" -}}
|
{{- define "actions-runner-controller.leaderElectionRoleName" -}}
|
||||||
{{- include "actions-runner-controller.fullname" . }}-leader-election
|
{{- include "actions-runner-controller.fullname" . }}-leader-election
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ spec:
|
|||||||
metadata:
|
metadata:
|
||||||
{{- with .Values.podAnnotations }}
|
{{- with .Values.podAnnotations }}
|
||||||
annotations:
|
annotations:
|
||||||
|
kubectl.kubernetes.io/default-logs-container: "manager"
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
labels:
|
labels:
|
||||||
@@ -43,7 +44,9 @@ spec:
|
|||||||
{{- if .Values.leaderElectionId }}
|
{{- if .Values.leaderElectionId }}
|
||||||
- "--leader-election-id={{ .Values.leaderElectionId }}"
|
- "--leader-election-id={{ .Values.leaderElectionId }}"
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
- "--port={{ .Values.webhookPort }}"
|
||||||
- "--sync-period={{ .Values.syncPeriod }}"
|
- "--sync-period={{ .Values.syncPeriod }}"
|
||||||
|
- "--default-scale-down-delay={{ .Values.defaultScaleDownDelay }}"
|
||||||
- "--docker-image={{ .Values.image.dindSidecarRepositoryAndTag }}"
|
- "--docker-image={{ .Values.image.dindSidecarRepositoryAndTag }}"
|
||||||
- "--runner-image={{ .Values.image.actionsRunnerRepositoryAndTag }}"
|
- "--runner-image={{ .Values.image.actionsRunnerRepositoryAndTag }}"
|
||||||
{{- range .Values.image.actionsRunnerImagePullSecrets }}
|
{{- range .Values.image.actionsRunnerImagePullSecrets }}
|
||||||
@@ -104,17 +107,16 @@ spec:
|
|||||||
key: github_app_private_key
|
key: github_app_private_key
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.secretName" . }}
|
||||||
optional: true
|
optional: true
|
||||||
{{- if .Values.authSecret.github_basicauth_username }}
|
{{- if .Values.authSecret.github_basicauth_username }}
|
||||||
- name: GITHUB_BASICAUTH_USERNAME
|
- name: GITHUB_BASICAUTH_USERNAME
|
||||||
value: {{ .Values.authSecret.github_basicauth_username }}
|
value: {{ .Values.authSecret.github_basicauth_username }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.authSecret.github_basicauth_password }}
|
|
||||||
- name: GITHUB_BASICAUTH_PASSWORD
|
- name: GITHUB_BASICAUTH_PASSWORD
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
key: github_basicauth_password
|
key: github_basicauth_password
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.secretName" . }}
|
||||||
{{- end }}
|
optional: true
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- range $key, $val := .Values.env }}
|
{{- range $key, $val := .Values.env }}
|
||||||
- name: {{ $key }}
|
- name: {{ $key }}
|
||||||
@@ -124,7 +126,7 @@ spec:
|
|||||||
name: manager
|
name: manager
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 9443
|
- containerPort: {{ .Values.webhookPort }}
|
||||||
name: webhook-server
|
name: webhook-server
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
{{- if not .Values.metrics.proxy.enabled }}
|
{{- if not .Values.metrics.proxy.enabled }}
|
||||||
@@ -199,3 +201,6 @@ spec:
|
|||||||
topologySpreadConstraints:
|
topologySpreadConstraints:
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
{{- if .Values.hostNetwork }}
|
||||||
|
hostNetwork: {{ .Values.hostNetwork }}
|
||||||
|
{{- end }}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ spec:
|
|||||||
metadata:
|
metadata:
|
||||||
{{- with .Values.githubWebhookServer.podAnnotations }}
|
{{- with .Values.githubWebhookServer.podAnnotations }}
|
||||||
annotations:
|
annotations:
|
||||||
|
kubectl.kubernetes.io/default-logs-container: "github-webhook-server"
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
labels:
|
labels:
|
||||||
@@ -69,42 +70,41 @@ spec:
|
|||||||
- name: GITHUB_UPLOAD_URL
|
- name: GITHUB_UPLOAD_URL
|
||||||
value: {{ .Values.githubUploadURL }}
|
value: {{ .Values.githubUploadURL }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.authSecret.enabled }}
|
{{- if and .Values.githubWebhookServer.useRunnerGroupsVisibility .Values.githubWebhookServer.secret.enabled }}
|
||||||
- name: GITHUB_TOKEN
|
- name: GITHUB_TOKEN
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
key: github_token
|
key: github_token
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
|
||||||
optional: true
|
optional: true
|
||||||
- name: GITHUB_APP_ID
|
- name: GITHUB_APP_ID
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
key: github_app_id
|
key: github_app_id
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
|
||||||
optional: true
|
optional: true
|
||||||
- name: GITHUB_APP_INSTALLATION_ID
|
- name: GITHUB_APP_INSTALLATION_ID
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
key: github_app_installation_id
|
key: github_app_installation_id
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
|
||||||
optional: true
|
optional: true
|
||||||
- name: GITHUB_APP_PRIVATE_KEY
|
- name: GITHUB_APP_PRIVATE_KEY
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
key: github_app_private_key
|
key: github_app_private_key
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
|
||||||
optional: true
|
optional: true
|
||||||
{{- if .Values.authSecret.github_basicauth_username }}
|
{{- if .Values.authSecret.github_basicauth_username }}
|
||||||
- name: GITHUB_BASICAUTH_USERNAME
|
- name: GITHUB_BASICAUTH_USERNAME
|
||||||
value: {{ .Values.authSecret.github_basicauth_username }}
|
value: {{ .Values.authSecret.github_basicauth_username }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.authSecret.github_basicauth_password }}
|
|
||||||
- name: GITHUB_BASICAUTH_PASSWORD
|
- name: GITHUB_BASICAUTH_PASSWORD
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
key: github_basicauth_password
|
key: github_basicauth_password
|
||||||
name: {{ include "actions-runner-controller.secretName" . }}
|
name: {{ include "actions-runner-controller.secretName" . }}
|
||||||
{{- end }}
|
optional: true
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- range $key, $val := .Values.githubWebhookServer.env }}
|
{{- range $key, $val := .Values.githubWebhookServer.env }}
|
||||||
- name: {{ $key }}
|
- name: {{ $key }}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ apiVersion: extensions/v1beta1
|
|||||||
kind: Ingress
|
kind: Ingress
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ $fullName }}
|
name: {{ $fullName }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "actions-runner-controller.labels" . | nindent 4 }}
|
{{- include "actions-runner-controller.labels" . | nindent 4 }}
|
||||||
{{- with .Values.githubWebhookServer.ingress.annotations }}
|
{{- with .Values.githubWebhookServer.ingress.annotations }}
|
||||||
@@ -36,6 +37,9 @@ spec:
|
|||||||
- host: {{ .host | quote }}
|
- host: {{ .host | quote }}
|
||||||
http:
|
http:
|
||||||
paths:
|
paths:
|
||||||
|
{{- if .extraPaths }}
|
||||||
|
{{- toYaml .extraPaths | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
{{- range .paths }}
|
{{- range .paths }}
|
||||||
- path: {{ .path }}
|
- path: {{ .path }}
|
||||||
{{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1" }}
|
{{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1" }}
|
||||||
|
|||||||
@@ -195,6 +195,28 @@ rules:
|
|||||||
verbs:
|
verbs:
|
||||||
- create
|
- create
|
||||||
- patch
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- persistentvolumeclaims
|
||||||
|
verbs:
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- persistentvolumes
|
||||||
|
verbs:
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- coordination.k8s.io
|
- coordination.k8s.io
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@@ -12,6 +12,11 @@ metadata:
|
|||||||
webhooks:
|
webhooks:
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ quote .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ quote .Values.admissionWebHooks.caBundle }}
|
||||||
@@ -35,6 +40,11 @@ webhooks:
|
|||||||
sideEffects: None
|
sideEffects: None
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
||||||
@@ -58,6 +68,11 @@ webhooks:
|
|||||||
sideEffects: None
|
sideEffects: None
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
||||||
@@ -81,6 +96,11 @@ webhooks:
|
|||||||
sideEffects: None
|
sideEffects: None
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
||||||
@@ -117,6 +137,11 @@ metadata:
|
|||||||
webhooks:
|
webhooks:
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
||||||
@@ -140,6 +165,11 @@ webhooks:
|
|||||||
sideEffects: None
|
sideEffects: None
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
||||||
@@ -163,6 +193,11 @@ webhooks:
|
|||||||
sideEffects: None
|
sideEffects: None
|
||||||
- admissionReviewVersions:
|
- admissionReviewVersions:
|
||||||
- v1beta1
|
- v1beta1
|
||||||
|
{{- if .Values.scope.singleNamespace }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
name: {{ default .Release.Namespace .Values.scope.watchNamespace }}
|
||||||
|
{{- end }}
|
||||||
clientConfig:
|
clientConfig:
|
||||||
{{- if .Values.admissionWebHooks.caBundle }}
|
{{- if .Values.admissionWebHooks.caBundle }}
|
||||||
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
caBundle: {{ .Values.admissionWebHooks.caBundle }}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ spec:
|
|||||||
type: {{ .Values.service.type }}
|
type: {{ .Values.service.type }}
|
||||||
ports:
|
ports:
|
||||||
- port: 443
|
- port: 443
|
||||||
targetPort: 9443
|
targetPort: {{ .Values.webhookPort }}
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
name: https
|
name: https
|
||||||
selector:
|
selector:
|
||||||
|
|||||||
@@ -6,13 +6,16 @@ labels: {}
|
|||||||
|
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
|
||||||
syncPeriod: 10m
|
webhookPort: 9443
|
||||||
|
syncPeriod: 1m
|
||||||
|
defaultScaleDownDelay: 10m
|
||||||
|
|
||||||
enableLeaderElection: true
|
enableLeaderElection: true
|
||||||
# Specifies the controller id for leader election.
|
# Specifies the controller id for leader election.
|
||||||
# Must be unique if more than one controller installed onto the same namespace.
|
# Must be unique if more than one controller installed onto the same namespace.
|
||||||
#leaderElectionId: "actions-runner-controller"
|
#leaderElectionId: "actions-runner-controller"
|
||||||
|
|
||||||
|
# DEPRECATED: This has been removed as unnecessary in #1192
|
||||||
# The controller tries its best not to repeat the duplicate GitHub API call
|
# The controller tries its best not to repeat the duplicate GitHub API call
|
||||||
# within this duration.
|
# within this duration.
|
||||||
# Defaults to syncPeriod - 10s.
|
# Defaults to syncPeriod - 10s.
|
||||||
@@ -106,7 +109,7 @@ metrics:
|
|||||||
enabled: true
|
enabled: true
|
||||||
image:
|
image:
|
||||||
repository: quay.io/brancz/kube-rbac-proxy
|
repository: quay.io/brancz/kube-rbac-proxy
|
||||||
tag: v0.11.0
|
tag: v0.12.0
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
{}
|
{}
|
||||||
@@ -165,11 +168,17 @@ admissionWebHooks:
|
|||||||
{}
|
{}
|
||||||
#caBundle: "Ci0tLS0tQk...<base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate>...tLS0K"
|
#caBundle: "Ci0tLS0tQk...<base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate>...tLS0K"
|
||||||
|
|
||||||
|
# There may be alternatives to setting `hostNetwork: true`, see
|
||||||
|
# https://github.com/actions-runner-controller/actions-runner-controller/issues/1005#issuecomment-993097155
|
||||||
|
#hostNetwork: true
|
||||||
|
|
||||||
githubWebhookServer:
|
githubWebhookServer:
|
||||||
enabled: false
|
enabled: false
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
syncPeriod: 10m
|
syncPeriod: 10m
|
||||||
|
useRunnerGroupsVisibility: false
|
||||||
secret:
|
secret:
|
||||||
|
enabled: false
|
||||||
create: false
|
create: false
|
||||||
name: "github-webhook-server"
|
name: "github-webhook-server"
|
||||||
### GitHub Webhook Configuration
|
### GitHub Webhook Configuration
|
||||||
@@ -215,6 +224,20 @@ githubWebhookServer:
|
|||||||
paths: []
|
paths: []
|
||||||
# - path: /*
|
# - path: /*
|
||||||
# pathType: ImplementationSpecific
|
# pathType: ImplementationSpecific
|
||||||
|
# Extra paths that are not automatically connected to the server. This is useful when working with annotation based services.
|
||||||
|
extraPaths: []
|
||||||
|
# - path: /*
|
||||||
|
# backend:
|
||||||
|
# serviceName: ssl-redirect
|
||||||
|
# servicePort: use-annotation
|
||||||
|
## for Kubernetes >=1.19 (when "networking.k8s.io/v1" is used)
|
||||||
|
# - path: /*
|
||||||
|
# pathType: Prefix
|
||||||
|
# backend:
|
||||||
|
# service:
|
||||||
|
# name: ssl-redirect
|
||||||
|
# port:
|
||||||
|
# name: use-annotation
|
||||||
tls: []
|
tls: []
|
||||||
# - secretName: chart-example-tls
|
# - secretName: chart-example-tls
|
||||||
# hosts:
|
# hosts:
|
||||||
|
|||||||
@@ -29,15 +29,14 @@ import (
|
|||||||
actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/controllers"
|
"github.com/actions-runner-controller/actions-runner-controller/controllers"
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/github"
|
"github.com/actions-runner-controller/actions-runner-controller/github"
|
||||||
|
"github.com/actions-runner-controller/actions-runner-controller/logging"
|
||||||
"github.com/kelseyhightower/envconfig"
|
"github.com/kelseyhightower/envconfig"
|
||||||
zaplib "go.uber.org/zap"
|
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
||||||
_ "k8s.io/client-go/plugin/pkg/client/auth/exec"
|
_ "k8s.io/client-go/plugin/pkg/client/auth/exec"
|
||||||
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
|
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
|
||||||
_ "k8s.io/client-go/plugin/pkg/client/auth/oidc"
|
_ "k8s.io/client-go/plugin/pkg/client/auth/oidc"
|
||||||
ctrl "sigs.k8s.io/controller-runtime"
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/log/zap"
|
|
||||||
// +kubebuilder:scaffold:imports
|
// +kubebuilder:scaffold:imports
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -47,11 +46,6 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
logLevelDebug = "debug"
|
|
||||||
logLevelInfo = "info"
|
|
||||||
logLevelWarn = "warn"
|
|
||||||
logLevelError = "error"
|
|
||||||
|
|
||||||
webhookSecretTokenEnvName = "GITHUB_WEBHOOK_SECRET_TOKEN"
|
webhookSecretTokenEnvName = "GITHUB_WEBHOOK_SECRET_TOKEN"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -97,7 +91,7 @@ func main() {
|
|||||||
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
|
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
|
||||||
"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
|
"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
|
||||||
flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change")
|
flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change")
|
||||||
flag.StringVar(&logLevel, "log-level", logLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
|
flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
|
||||||
flag.StringVar(&webhookSecretToken, "github-webhook-secret-token", "", "The personal access token of GitHub.")
|
flag.StringVar(&webhookSecretToken, "github-webhook-secret-token", "", "The personal access token of GitHub.")
|
||||||
flag.StringVar(&c.Token, "github-token", c.Token, "The personal access token of GitHub.")
|
flag.StringVar(&c.Token, "github-token", c.Token, "The personal access token of GitHub.")
|
||||||
flag.Int64Var(&c.AppID, "github-app-id", c.AppID, "The application ID of GitHub App.")
|
flag.Int64Var(&c.AppID, "github-app-id", c.AppID, "The application ID of GitHub App.")
|
||||||
@@ -126,33 +120,28 @@ func main() {
|
|||||||
setupLog.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.")
|
setupLog.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.")
|
||||||
}
|
}
|
||||||
|
|
||||||
logger := zap.New(func(o *zap.Options) {
|
logger := logging.NewLogger(logLevel)
|
||||||
switch logLevel {
|
|
||||||
case logLevelDebug:
|
|
||||||
o.Development = true
|
|
||||||
case logLevelInfo:
|
|
||||||
lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel)
|
|
||||||
o.Level = &lvl
|
|
||||||
case logLevelWarn:
|
|
||||||
lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel)
|
|
||||||
o.Level = &lvl
|
|
||||||
case logLevelError:
|
|
||||||
lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel)
|
|
||||||
o.Level = &lvl
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
|
ctrl.SetLogger(logger)
|
||||||
|
|
||||||
|
// In order to support runner groups with custom visibility (selected repositories), we need to perform some GitHub API calls.
|
||||||
|
// Let the user define if they want to opt-in supporting this option by providing the proper GitHub authentication parameters
|
||||||
|
// Without an opt-in, runner groups with custom visibility won't be supported to save API calls
|
||||||
|
// That is, all runner groups managed by ARC are assumed to be visible to any repositories,
|
||||||
|
// which is wrong when you have one or more non-default runner groups in your organization or enterprise.
|
||||||
if len(c.Token) > 0 || (c.AppID > 0 && c.AppInstallationID > 0 && c.AppPrivateKey != "") || (len(c.BasicauthUsername) > 0 && len(c.BasicauthPassword) > 0) {
|
if len(c.Token) > 0 || (c.AppID > 0 && c.AppInstallationID > 0 && c.AppPrivateKey != "") || (len(c.BasicauthUsername) > 0 && len(c.BasicauthPassword) > 0) {
|
||||||
|
c.Log = &logger
|
||||||
|
|
||||||
ghClient, err = c.NewClient()
|
ghClient, err = c.NewClient()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err)
|
fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err)
|
||||||
setupLog.Error(err, "unable to create controller", "controller", "Runner")
|
setupLog.Error(err, "unable to create controller", "controller", "Runner")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
setupLog.Info("GitHub client is not initialized. Runner groups with custom visibility are not supported. If needed, please provide GitHub authentication. This will incur in extra GitHub API calls")
|
||||||
}
|
}
|
||||||
|
|
||||||
ctrl.SetLogger(logger)
|
|
||||||
|
|
||||||
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
|
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
|
||||||
Scheme: scheme,
|
Scheme: scheme,
|
||||||
SyncPeriod: &syncPeriod,
|
SyncPeriod: &syncPeriod,
|
||||||
@@ -167,8 +156,9 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hraGitHubWebhook := &controllers.HorizontalRunnerAutoscalerGitHubWebhook{
|
hraGitHubWebhook := &controllers.HorizontalRunnerAutoscalerGitHubWebhook{
|
||||||
|
Name: "webhookbasedautoscaler",
|
||||||
Client: mgr.GetClient(),
|
Client: mgr.GetClient(),
|
||||||
Log: ctrl.Log.WithName("controllers").WithName("Runner"),
|
Log: ctrl.Log.WithName("controllers").WithName("webhookbasedautoscaler"),
|
||||||
Recorder: nil,
|
Recorder: nil,
|
||||||
Scheme: mgr.GetScheme(),
|
Scheme: mgr.GetScheme(),
|
||||||
SecretKeyBytes: []byte(webhookSecretToken),
|
SecretKeyBytes: []byte(webhookSecretToken),
|
||||||
@@ -177,7 +167,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err = hraGitHubWebhook.SetupWithManager(mgr); err != nil {
|
if err = hraGitHubWebhook.SetupWithManager(mgr); err != nil {
|
||||||
setupLog.Error(err, "unable to create controller", "controller", "Runner")
|
setupLog.Error(err, "unable to create controller", "controller", "webhookbasedautoscaler")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,9 @@ spec:
|
|||||||
items:
|
items:
|
||||||
description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime.
|
description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime.
|
||||||
properties:
|
properties:
|
||||||
|
effectiveTime:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
expirationTime:
|
expirationTime:
|
||||||
format: date-time
|
format: date-time
|
||||||
type: string
|
type: string
|
||||||
@@ -138,6 +141,7 @@ spec:
|
|||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
types:
|
types:
|
||||||
|
description: 'One of: created, rerequested, or completed'
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
@@ -157,6 +161,9 @@ spec:
|
|||||||
push:
|
push:
|
||||||
description: PushSpec is the condition for triggering scale-up on push event Also see https://docs.github.com/en/actions/reference/events-that-trigger-workflows#push
|
description: PushSpec is the condition for triggering scale-up on push event Also see https://docs.github.com/en/actions/reference/events-that-trigger-workflows#push
|
||||||
type: object
|
type: object
|
||||||
|
workflowJob:
|
||||||
|
description: https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job
|
||||||
|
type: object
|
||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
|
|||||||
@@ -48,6 +48,11 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
description: RunnerDeploymentSpec defines the desired state of RunnerDeployment
|
description: RunnerDeploymentSpec defines the desired state of RunnerDeployment
|
||||||
properties:
|
properties:
|
||||||
|
effectiveTime:
|
||||||
|
description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
|
||||||
|
format: date-time
|
||||||
|
nullable: true
|
||||||
|
type: string
|
||||||
replicas:
|
replicas:
|
||||||
nullable: true
|
nullable: true
|
||||||
type: integer
|
type: integer
|
||||||
@@ -1349,33 +1354,31 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
dnsConfig:
|
dnsConfig:
|
||||||
items:
|
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
||||||
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
properties:
|
||||||
properties:
|
nameservers:
|
||||||
nameservers:
|
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
||||||
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
options:
|
||||||
options:
|
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
||||||
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
items:
|
||||||
items:
|
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
||||||
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
properties:
|
||||||
properties:
|
name:
|
||||||
name:
|
description: Required.
|
||||||
description: Required.
|
type: string
|
||||||
type: string
|
value:
|
||||||
value:
|
type: string
|
||||||
type: string
|
type: object
|
||||||
type: object
|
type: array
|
||||||
type: array
|
searches:
|
||||||
searches:
|
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
||||||
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
type: object
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
dockerEnabled:
|
dockerEnabled:
|
||||||
type: boolean
|
type: boolean
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
@@ -4152,7 +4155,7 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
topologySpreadConstraint:
|
topologySpreadConstraints:
|
||||||
items:
|
items:
|
||||||
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
@@ -45,6 +45,11 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet
|
description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet
|
||||||
properties:
|
properties:
|
||||||
|
effectiveTime:
|
||||||
|
description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment. The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners based on potentially outdated Replicas value.
|
||||||
|
format: date-time
|
||||||
|
nullable: true
|
||||||
|
type: string
|
||||||
replicas:
|
replicas:
|
||||||
nullable: true
|
nullable: true
|
||||||
type: integer
|
type: integer
|
||||||
@@ -1346,33 +1351,31 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
dnsConfig:
|
dnsConfig:
|
||||||
items:
|
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
||||||
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
properties:
|
||||||
properties:
|
nameservers:
|
||||||
nameservers:
|
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
||||||
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
options:
|
||||||
options:
|
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
||||||
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
items:
|
||||||
items:
|
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
||||||
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
properties:
|
||||||
properties:
|
name:
|
||||||
name:
|
description: Required.
|
||||||
description: Required.
|
type: string
|
||||||
type: string
|
value:
|
||||||
value:
|
type: string
|
||||||
type: string
|
type: object
|
||||||
type: object
|
type: array
|
||||||
type: array
|
searches:
|
||||||
searches:
|
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
||||||
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
type: object
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
dockerEnabled:
|
dockerEnabled:
|
||||||
type: boolean
|
type: boolean
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
@@ -4149,7 +4152,7 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
topologySpreadConstraint:
|
topologySpreadConstraints:
|
||||||
items:
|
items:
|
||||||
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
@@ -1292,33 +1292,31 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
dnsConfig:
|
dnsConfig:
|
||||||
items:
|
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
||||||
description: PodDNSConfig defines the DNS parameters of a pod in addition to those generated from DNSPolicy.
|
properties:
|
||||||
properties:
|
nameservers:
|
||||||
nameservers:
|
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
||||||
description: A list of DNS name server IP addresses. This will be appended to the base nameservers generated from DNSPolicy. Duplicated nameservers will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
options:
|
||||||
options:
|
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
||||||
description: A list of DNS resolver options. This will be merged with the base options generated from DNSPolicy. Duplicated entries will be removed. Resolution options given in Options will override those that appear in the base DNSPolicy.
|
items:
|
||||||
items:
|
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
||||||
description: PodDNSConfigOption defines DNS resolver options of a pod.
|
properties:
|
||||||
properties:
|
name:
|
||||||
name:
|
description: Required.
|
||||||
description: Required.
|
type: string
|
||||||
type: string
|
value:
|
||||||
value:
|
type: string
|
||||||
type: string
|
type: object
|
||||||
type: object
|
type: array
|
||||||
type: array
|
searches:
|
||||||
searches:
|
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
||||||
description: A list of DNS search domains for host-name lookup. This will be appended to the base search paths generated from DNSPolicy. Duplicated search paths will be removed.
|
items:
|
||||||
items:
|
type: string
|
||||||
type: string
|
type: array
|
||||||
type: array
|
type: object
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
dockerEnabled:
|
dockerEnabled:
|
||||||
type: boolean
|
type: boolean
|
||||||
dockerEnv:
|
dockerEnv:
|
||||||
@@ -4095,7 +4093,7 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
type: array
|
type: array
|
||||||
topologySpreadConstraint:
|
topologySpreadConstraints:
|
||||||
items:
|
items:
|
||||||
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
description: TopologySpreadConstraint specifies how to spread matching pods among the given topology.
|
||||||
properties:
|
properties:
|
||||||
@@ -5126,6 +5124,9 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
phase:
|
phase:
|
||||||
type: string
|
type: string
|
||||||
|
ready:
|
||||||
|
description: Turns true only if the runner pod is ready.
|
||||||
|
type: boolean
|
||||||
reason:
|
reason:
|
||||||
type: string
|
type: string
|
||||||
registration:
|
registration:
|
||||||
|
|||||||
@@ -55,6 +55,11 @@ spec:
|
|||||||
type: string
|
type: string
|
||||||
dockerdWithinRunnerContainer:
|
dockerdWithinRunnerContainer:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
effectiveTime:
|
||||||
|
description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. It is used to prevent ephemeral runners from unnecessarily recreated.
|
||||||
|
format: date-time
|
||||||
|
nullable: true
|
||||||
|
type: string
|
||||||
enterprise:
|
enterprise:
|
||||||
pattern: ^[^/]+$
|
pattern: ^[^/]+$
|
||||||
type: string
|
type: string
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ spec:
|
|||||||
conversion:
|
conversion:
|
||||||
strategy: Webhook
|
strategy: Webhook
|
||||||
webhook:
|
webhook:
|
||||||
|
conversionReviewVersions: ["v1","v1beta1"]
|
||||||
clientConfig:
|
clientConfig:
|
||||||
# this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
|
# this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank,
|
||||||
# but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
|
# but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager)
|
||||||
|
|||||||
23
config/default/gh-webhook-server-auth-proxy-patch.yaml
Normal file
23
config/default/gh-webhook-server-auth-proxy-patch.yaml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# This patch injects an HTTP proxy sidecar container that performs RBAC
|
||||||
|
# authorization against the Kubernetes API using SubjectAccessReviews.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: github-webhook-server
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: kube-rbac-proxy
|
||||||
|
image: quay.io/brancz/kube-rbac-proxy:v0.10.0
|
||||||
|
args:
|
||||||
|
- '--secure-listen-address=0.0.0.0:8443'
|
||||||
|
- '--upstream=http://127.0.0.1:8080/'
|
||||||
|
- '--logtostderr=true'
|
||||||
|
- '--v=10'
|
||||||
|
ports:
|
||||||
|
- containerPort: 8443
|
||||||
|
name: https
|
||||||
|
- name: github-webhook-server
|
||||||
|
args:
|
||||||
|
- '--metrics-addr=127.0.0.1:8080'
|
||||||
@@ -20,19 +20,22 @@ bases:
|
|||||||
- ../webhook
|
- ../webhook
|
||||||
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
|
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
|
||||||
- ../certmanager
|
- ../certmanager
|
||||||
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
|
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
|
||||||
#- ../prometheus
|
#- ../prometheus
|
||||||
|
# [GH_WEBHOOK_SERVER] To enable the GitHub webhook server, uncomment all sections with 'GH_WEBHOOK_SERVER'.
|
||||||
|
#- ../github-webhook-server
|
||||||
|
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
# Protect the /metrics endpoint by putting it behind auth.
|
# Protect the /metrics endpoint by putting it behind auth.
|
||||||
# Only one of manager_auth_proxy_patch.yaml and
|
# Only one of manager_auth_proxy_patch.yaml and
|
||||||
# manager_prometheus_metrics_patch.yaml should be enabled.
|
# manager_prometheus_metrics_patch.yaml should be enabled.
|
||||||
- manager_auth_proxy_patch.yaml
|
- manager_auth_proxy_patch.yaml
|
||||||
# If you want your controller-manager to expose the /metrics
|
|
||||||
# endpoint w/o any authn/z, uncomment the following line and
|
# If you want your controller-manager to expose the /metrics
|
||||||
# comment manager_auth_proxy_patch.yaml.
|
# endpoint w/o any authn/z, uncomment the following line and
|
||||||
# Only one of manager_auth_proxy_patch.yaml and
|
# comment manager_auth_proxy_patch.yaml.
|
||||||
# manager_prometheus_metrics_patch.yaml should be enabled.
|
# Only one of manager_auth_proxy_patch.yaml and
|
||||||
|
# manager_prometheus_metrics_patch.yaml should be enabled.
|
||||||
#- manager_prometheus_metrics_patch.yaml
|
#- manager_prometheus_metrics_patch.yaml
|
||||||
|
|
||||||
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in crd/kustomization.yaml
|
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in crd/kustomization.yaml
|
||||||
@@ -43,6 +46,10 @@ patchesStrategicMerge:
|
|||||||
# 'CERTMANAGER' needs to be enabled to use ca injection
|
# 'CERTMANAGER' needs to be enabled to use ca injection
|
||||||
- webhookcainjection_patch.yaml
|
- webhookcainjection_patch.yaml
|
||||||
|
|
||||||
|
# [GH_WEBHOOK_SERVER] To enable the GitHub webhook server, uncomment all sections with 'GH_WEBHOOK_SERVER'.
|
||||||
|
# Protect the GitHub webhook server metrics endpoint by putting it behind auth.
|
||||||
|
# - gh-webhook-server-auth-proxy-patch.yaml
|
||||||
|
|
||||||
# the following config is for teaching kustomize how to do var substitution
|
# the following config is for teaching kustomize how to do var substitution
|
||||||
vars:
|
vars:
|
||||||
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
|
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
|
||||||
|
|||||||
@@ -23,4 +23,3 @@ spec:
|
|||||||
args:
|
args:
|
||||||
- "--metrics-addr=127.0.0.1:8080"
|
- "--metrics-addr=127.0.0.1:8080"
|
||||||
- "--enable-leader-election"
|
- "--enable-leader-election"
|
||||||
- "--sync-period=10m"
|
|
||||||
|
|||||||
37
config/github-webhook-server/deployment.yaml
Normal file
37
config/github-webhook-server/deployment.yaml
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
name: github-webhook-server
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: github-webhook-server
|
||||||
|
image: controller:latest
|
||||||
|
command:
|
||||||
|
- '/github-webhook-server'
|
||||||
|
env:
|
||||||
|
- name: GITHUB_WEBHOOK_SECRET_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
key: github_webhook_secret_token
|
||||||
|
name: github-webhook-server
|
||||||
|
optional: true
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
name: http
|
||||||
|
protocol: TCP
|
||||||
|
serviceAccountName: github-webhook-server
|
||||||
|
terminationGracePeriodSeconds: 10
|
||||||
12
config/github-webhook-server/kustomization.yaml
Normal file
12
config/github-webhook-server/kustomization.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
images:
|
||||||
|
- name: controller
|
||||||
|
newName: summerwind/actions-runner-controller
|
||||||
|
newTag: latest
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- rbac.yaml
|
||||||
|
- service.yaml
|
||||||
113
config/github-webhook-server/rbac.yaml
Normal file
113
config/github-webhook-server/rbac.yaml
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
name: github-webhook-server
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
name: github-webhook-server
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- horizontalrunnerautoscalers
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- horizontalrunnerautoscalers/finalizers
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- horizontalrunnerautoscalers/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- runnersets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- runnerdeployments
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- runnerdeployments/finalizers
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- actions.summerwind.dev
|
||||||
|
resources:
|
||||||
|
- runnerdeployments/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- authentication.k8s.io
|
||||||
|
resources:
|
||||||
|
- tokenreviews
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- apiGroups:
|
||||||
|
- authorization.k8s.io
|
||||||
|
resources:
|
||||||
|
- subjectaccessreviews
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
name: github-webhook-server
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: github-webhook-server
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: github-webhook-server
|
||||||
16
config/github-webhook-server/service.yaml
Normal file
16
config/github-webhook-server/service.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
|
name: github-webhook-server
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: http
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/component: github-webhook-server
|
||||||
|
app.kubernetes.io/part-of: actions-runner-controller
|
||||||
@@ -202,6 +202,29 @@ rules:
|
|||||||
verbs:
|
verbs:
|
||||||
- create
|
- create
|
||||||
- patch
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- persistentvolumeclaims
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- persistentvolumes
|
||||||
|
verbs:
|
||||||
|
- delete
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
"github.com/google/go-github/v39/github"
|
"github.com/google/go-github/v39/github"
|
||||||
@@ -20,47 +19,6 @@ const (
|
|||||||
defaultScaleDownFactor = 0.7
|
defaultScaleDownFactor = 0.7
|
||||||
)
|
)
|
||||||
|
|
||||||
func getValueAvailableAt(now time.Time, from, to *time.Time, reservedValue int) *int {
|
|
||||||
if to != nil && now.After(*to) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if from != nil && now.Before(*from) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return &reservedValue
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *HorizontalRunnerAutoscalerReconciler) fetchSuggestedReplicasFromCache(hra v1alpha1.HorizontalRunnerAutoscaler) *int {
|
|
||||||
var entry *v1alpha1.CacheEntry
|
|
||||||
|
|
||||||
for i := range hra.Status.CacheEntries {
|
|
||||||
ent := hra.Status.CacheEntries[i]
|
|
||||||
|
|
||||||
if ent.Key != v1alpha1.CacheEntryKeyDesiredReplicas {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if !time.Now().Before(ent.ExpirationTime.Time) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
entry = &ent
|
|
||||||
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
if entry != nil {
|
|
||||||
v := getValueAvailableAt(time.Now(), nil, &entry.ExpirationTime.Time, entry.Value)
|
|
||||||
if v != nil {
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(st scaleTarget, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(st scaleTarget, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||||
if hra.Spec.MinReplicas == nil {
|
if hra.Spec.MinReplicas == nil {
|
||||||
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing minReplicas", hra.Namespace, hra.Name)
|
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing minReplicas", hra.Namespace, hra.Name)
|
||||||
@@ -71,10 +29,8 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(st scaleTa
|
|||||||
metrics := hra.Spec.Metrics
|
metrics := hra.Spec.Metrics
|
||||||
numMetrics := len(metrics)
|
numMetrics := len(metrics)
|
||||||
if numMetrics == 0 {
|
if numMetrics == 0 {
|
||||||
if len(hra.Spec.ScaleUpTriggers) == 0 {
|
// We don't default to anything since ARC 0.23.0
|
||||||
return r.suggestReplicasByQueuedAndInProgressWorkflowRuns(st, hra, nil)
|
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/728
|
||||||
}
|
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
} else if numMetrics > 2 {
|
} else if numMetrics > 2 {
|
||||||
return nil, fmt.Errorf("too many autoscaling metrics configured: It must be 0 to 2, but got %d", numMetrics)
|
return nil, fmt.Errorf("too many autoscaling metrics configured: It must be 0 to 2, but got %d", numMetrics)
|
||||||
@@ -182,7 +138,29 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByQueuedAndInProgr
|
|||||||
if len(allJobs) == 0 {
|
if len(allJobs) == 0 {
|
||||||
fallback_cb()
|
fallback_cb()
|
||||||
} else {
|
} else {
|
||||||
|
JOB:
|
||||||
for _, job := range allJobs {
|
for _, job := range allJobs {
|
||||||
|
runnerLabels := make(map[string]struct{}, len(st.labels))
|
||||||
|
for _, l := range st.labels {
|
||||||
|
runnerLabels[l] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(job.Labels) == 0 {
|
||||||
|
// This shouldn't usually happen
|
||||||
|
r.Log.Info("Detected job with no labels, which is not supported by ARC. Skipping anyway.", "labels", job.Labels, "run_id", job.GetRunID(), "job_id", job.GetID())
|
||||||
|
continue JOB
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, l := range job.Labels {
|
||||||
|
if l == "self-hosted" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := runnerLabels[l]; !ok {
|
||||||
|
continue JOB
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch job.GetStatus() {
|
switch job.GetStatus() {
|
||||||
case "completed":
|
case "completed":
|
||||||
// We add a case for `completed` so it is not counted in `unknown`.
|
// We add a case for `completed` so it is not counted in `unknown`.
|
||||||
|
|||||||
@@ -41,8 +41,12 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
|
|
||||||
metav1Now := metav1.Now()
|
metav1Now := metav1.Now()
|
||||||
testcases := []struct {
|
testcases := []struct {
|
||||||
repo string
|
description string
|
||||||
org string
|
|
||||||
|
repo string
|
||||||
|
org string
|
||||||
|
labels []string
|
||||||
|
|
||||||
fixed *int
|
fixed *int
|
||||||
max *int
|
max *int
|
||||||
min *int
|
min *int
|
||||||
@@ -68,6 +72,19 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}]}"`,
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}]}"`,
|
||||||
want: 3,
|
want: 3,
|
||||||
},
|
},
|
||||||
|
// Explicitly speified the default `self-hosted` label which is ignored by the simulator,
|
||||||
|
// as we assume that GitHub Actions automatically associates the `self-hosted` label to every self-hosted runner.
|
||||||
|
// 3 demanded, max at 3
|
||||||
|
{
|
||||||
|
repo: "test/valid",
|
||||||
|
labels: []string{"self-hosted"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(3),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}]}"`,
|
||||||
|
want: 3,
|
||||||
|
},
|
||||||
// 2 demanded, max at 3, currently 3, delay scaling down due to grace period
|
// 2 demanded, max at 3, currently 3, delay scaling down due to grace period
|
||||||
{
|
{
|
||||||
repo: "test/valid",
|
repo: "test/valid",
|
||||||
@@ -152,9 +169,40 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
want: 3,
|
want: 3,
|
||||||
},
|
},
|
||||||
|
|
||||||
// Job-level autoscaling
|
|
||||||
// 5 requested from 3 workflows
|
|
||||||
{
|
{
|
||||||
|
description: "Job-level autoscaling with no explicit runner label (runners have implicit self-hosted, requested self-hosted, 5 jobs from 3 workflows)",
|
||||||
|
repo: "test/valid",
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted"]}, {"status":"queued", "labels":["self-hosted"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted"]}, {"status":"completed", "labels":["self-hosted"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted"]}, {"status":"queued", "labels":["self-hosted"]}]}`,
|
||||||
|
},
|
||||||
|
want: 5,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling with no explicit runner label (runners have implicit self-hosted, requested self-hosted+custom, 0 jobs from 3 workflows)",
|
||||||
|
repo: "test/valid",
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling with no label (runners have implicit self-hosted, jobs had no labels, 0 jobs from 3 workflows)",
|
||||||
repo: "test/valid",
|
repo: "test/valid",
|
||||||
min: intPtr(2),
|
min: intPtr(2),
|
||||||
max: intPtr(10),
|
max: intPtr(10),
|
||||||
@@ -166,6 +214,91 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
2: `{"jobs": [{"status": "in_progress"}, {"status":"completed"}]}`,
|
2: `{"jobs": [{"status": "in_progress"}, {"status":"completed"}]}`,
|
||||||
3: `{"jobs": [{"status": "in_progress"}, {"status":"queued"}]}`,
|
3: `{"jobs": [{"status": "in_progress"}, {"status":"queued"}]}`,
|
||||||
},
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling with default runner label (runners have self-hosted only, requested self-hosted+custom, 0 jobs from 3 workflows)",
|
||||||
|
repo: "test/valid",
|
||||||
|
labels: []string{"self-hosted"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling with custom runner label (runners have custom2, requested self-hosted+custom, 0 jobs from 5 workflows",
|
||||||
|
repo: "test/valid",
|
||||||
|
labels: []string{"custom2"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling with default runner label (runners have self-hosted, requested managed-runner-label, 0 jobs from 3 runs)",
|
||||||
|
repo: "test/valid",
|
||||||
|
labels: []string{"self-hosted"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["managed-runner-label"]}, {"status":"queued", "labels":["managed-runner-label"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["managed-runner-label"]}, {"status":"completed", "labels":["managed-runner-label"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["managed-runner-label"]}, {"status":"queued", "labels":["managed-runner-label"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Job-level autoscaling with default + custom runner label (runners have self-hosted+custom, requested self-hosted+custom, 5 jobs from 3 workflows)",
|
||||||
|
repo: "test/valid",
|
||||||
|
labels: []string{"self-hosted", "custom"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 5,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Job-level autoscaling with custom runner label (runners have custom, requested self-hosted+custom, 5 jobs from 3 workflows)",
|
||||||
|
repo: "test/valid",
|
||||||
|
labels: []string{"custom"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
want: 5,
|
want: 5,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -181,7 +314,12 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
_ = clientgoscheme.AddToScheme(scheme)
|
_ = clientgoscheme.AddToScheme(scheme)
|
||||||
_ = v1alpha1.AddToScheme(scheme)
|
_ = v1alpha1.AddToScheme(scheme)
|
||||||
|
|
||||||
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
testName := fmt.Sprintf("case %d", i)
|
||||||
|
if tc.description != "" {
|
||||||
|
testName = tc.description
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
server := fake.NewServer(
|
server := fake.NewServer(
|
||||||
fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns, tc.workflowRuns_queued, tc.workflowRuns_in_progress),
|
fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns, tc.workflowRuns_queued, tc.workflowRuns_in_progress),
|
||||||
fake.WithListWorkflowJobsResponse(200, tc.workflowJobs),
|
fake.WithListWorkflowJobsResponse(200, tc.workflowJobs),
|
||||||
@@ -191,9 +329,10 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
client := newGithubClient(server)
|
client := newGithubClient(server)
|
||||||
|
|
||||||
h := &HorizontalRunnerAutoscalerReconciler{
|
h := &HorizontalRunnerAutoscalerReconciler{
|
||||||
Log: log,
|
Log: log,
|
||||||
GitHubClient: client,
|
GitHubClient: client,
|
||||||
Scheme: scheme,
|
Scheme: scheme,
|
||||||
|
DefaultScaleDownDelay: DefaultScaleDownDelay,
|
||||||
}
|
}
|
||||||
|
|
||||||
rd := v1alpha1.RunnerDeployment{
|
rd := v1alpha1.RunnerDeployment{
|
||||||
@@ -206,6 +345,7 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
Spec: v1alpha1.RunnerSpec{
|
Spec: v1alpha1.RunnerSpec{
|
||||||
RunnerConfig: v1alpha1.RunnerConfig{
|
RunnerConfig: v1alpha1.RunnerConfig{
|
||||||
Repository: tc.repo,
|
Repository: tc.repo,
|
||||||
|
Labels: tc.labels,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -220,6 +360,11 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
Spec: v1alpha1.HorizontalRunnerAutoscalerSpec{
|
Spec: v1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||||
MaxReplicas: tc.max,
|
MaxReplicas: tc.max,
|
||||||
MinReplicas: tc.min,
|
MinReplicas: tc.min,
|
||||||
|
Metrics: []v1alpha1.MetricSpec{
|
||||||
|
{
|
||||||
|
Type: "TotalNumberOfQueuedAndInProgressWorkflowRuns",
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Status: v1alpha1.HorizontalRunnerAutoscalerStatus{
|
Status: v1alpha1.HorizontalRunnerAutoscalerStatus{
|
||||||
DesiredReplicas: tc.sReplicas,
|
DesiredReplicas: tc.sReplicas,
|
||||||
@@ -234,7 +379,7 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
|
|||||||
|
|
||||||
st := h.scaleTargetFromRD(context.Background(), rd)
|
st := h.scaleTargetFromRD(context.Background(), rd)
|
||||||
|
|
||||||
got, _, _, err := h.computeReplicasWithCache(log, metav1Now.Time, st, hra, minReplicas)
|
got, err := h.computeReplicasWithCache(log, metav1Now.Time, st, hra, minReplicas)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if tc.err == "" {
|
if tc.err == "" {
|
||||||
t.Fatalf("unexpected error: expected none, got %v", err)
|
t.Fatalf("unexpected error: expected none, got %v", err)
|
||||||
@@ -258,8 +403,12 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
|
|
||||||
metav1Now := metav1.Now()
|
metav1Now := metav1.Now()
|
||||||
testcases := []struct {
|
testcases := []struct {
|
||||||
repos []string
|
description string
|
||||||
org string
|
|
||||||
|
repos []string
|
||||||
|
org string
|
||||||
|
labels []string
|
||||||
|
|
||||||
fixed *int
|
fixed *int
|
||||||
max *int
|
max *int
|
||||||
min *int
|
min *int
|
||||||
@@ -399,9 +548,43 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
err: "validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment",
|
err: "validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment",
|
||||||
},
|
},
|
||||||
|
|
||||||
// Job-level autoscaling
|
|
||||||
// 5 requested from 3 workflows
|
|
||||||
{
|
{
|
||||||
|
description: "Job-level autoscaling (runners have implicit self-hosted, requested self-hosted, 5 jobs from 3 runs)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted"]}, {"status":"queued", "labels":["self-hosted"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted"]}, {"status":"completed", "labels":["self-hosted"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted"]}, {"status":"queued", "labels":["self-hosted"]}]}`,
|
||||||
|
},
|
||||||
|
want: 5,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Job-level autoscaling (runners have explicit self-hosted, requested self-hosted, 5 jobs from 3 runs)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
labels: []string{"self-hosted"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted"]}, {"status":"queued", "labels":["self-hosted"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted"]}, {"status":"completed", "labels":["self-hosted"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted"]}, {"status":"queued", "labels":["self-hosted"]}]}`,
|
||||||
|
},
|
||||||
|
want: 5,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling (jobs lack labels, 0 requested from 3 workflows)",
|
||||||
org: "test",
|
org: "test",
|
||||||
repos: []string{"valid"},
|
repos: []string{"valid"},
|
||||||
min: intPtr(2),
|
min: intPtr(2),
|
||||||
@@ -414,8 +597,97 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
2: `{"jobs": [{"status": "in_progress"}, {"status":"completed"}]}`,
|
2: `{"jobs": [{"status": "in_progress"}, {"status":"completed"}]}`,
|
||||||
3: `{"jobs": [{"status": "in_progress"}, {"status":"queued"}]}`,
|
3: `{"jobs": [{"status": "in_progress"}, {"status":"queued"}]}`,
|
||||||
},
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling (runners have valid and implicit self-hosted, requested self-hosted+custom, 0 jobs from 3 runs)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling (runners have self-hosted, requested self-hosted+custom, 0 jobs from 3 workflows)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
labels: []string{"self-hosted"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Job-level autoscaling (runners have custom, requested self-hosted+custom, 5 requested from 3 workflows)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
labels: []string{"custom"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
want: 5,
|
want: 5,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Job-level autoscaling (runners have custom, requested custom, 5 requested from 3 workflows)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
labels: []string{"custom"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["custom"]}, {"status":"queued", "labels":["custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["custom"]}, {"status":"completed", "labels":["custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["custom"]}, {"status":"queued", "labels":["custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 5,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
description: "Skipped job-level autoscaling (specified custom2, 0 requested from 3 workflows)",
|
||||||
|
org: "test",
|
||||||
|
repos: []string{"valid"},
|
||||||
|
labels: []string{"custom2"},
|
||||||
|
min: intPtr(2),
|
||||||
|
max: intPtr(10),
|
||||||
|
workflowRuns: `{"total_count": 4, "workflow_runs":[{"id": 1, "status":"queued"}, {"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowRuns_queued: `{"total_count": 1, "workflow_runs":[{"id": 1, "status":"queued"}]}"`,
|
||||||
|
workflowRuns_in_progress: `{"total_count": 2, "workflow_runs":[{"id": 2, "status":"in_progress"}, {"id": 3, "status":"in_progress"}, {"status":"completed"}]}"`,
|
||||||
|
workflowJobs: map[int]string{
|
||||||
|
1: `{"jobs": [{"status":"queued", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
2: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"completed", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
3: `{"jobs": [{"status": "in_progress", "labels":["self-hosted", "custom"]}, {"status":"queued", "labels":["self-hosted", "custom"]}]}`,
|
||||||
|
},
|
||||||
|
want: 2,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range testcases {
|
for i := range testcases {
|
||||||
@@ -429,7 +701,12 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
_ = clientgoscheme.AddToScheme(scheme)
|
_ = clientgoscheme.AddToScheme(scheme)
|
||||||
_ = v1alpha1.AddToScheme(scheme)
|
_ = v1alpha1.AddToScheme(scheme)
|
||||||
|
|
||||||
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
|
testName := fmt.Sprintf("case %d", i)
|
||||||
|
if tc.description != "" {
|
||||||
|
testName = tc.description
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
server := fake.NewServer(
|
server := fake.NewServer(
|
||||||
@@ -441,9 +718,10 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
client := newGithubClient(server)
|
client := newGithubClient(server)
|
||||||
|
|
||||||
h := &HorizontalRunnerAutoscalerReconciler{
|
h := &HorizontalRunnerAutoscalerReconciler{
|
||||||
Log: log,
|
Log: log,
|
||||||
Scheme: scheme,
|
Scheme: scheme,
|
||||||
GitHubClient: client,
|
GitHubClient: client,
|
||||||
|
DefaultScaleDownDelay: DefaultScaleDownDelay,
|
||||||
}
|
}
|
||||||
|
|
||||||
rd := v1alpha1.RunnerDeployment{
|
rd := v1alpha1.RunnerDeployment{
|
||||||
@@ -465,6 +743,7 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
Spec: v1alpha1.RunnerSpec{
|
Spec: v1alpha1.RunnerSpec{
|
||||||
RunnerConfig: v1alpha1.RunnerConfig{
|
RunnerConfig: v1alpha1.RunnerConfig{
|
||||||
Organization: tc.org,
|
Organization: tc.org,
|
||||||
|
Labels: tc.labels,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -502,7 +781,7 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
|
|||||||
|
|
||||||
st := h.scaleTargetFromRD(context.Background(), rd)
|
st := h.scaleTargetFromRD(context.Background(), rd)
|
||||||
|
|
||||||
got, _, _, err := h.computeReplicasWithCache(log, metav1Now.Time, st, hra, minReplicas)
|
got, err := h.computeReplicasWithCache(log, metav1Now.Time, st, hra, minReplicas)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if tc.err == "" {
|
if tc.err == "" {
|
||||||
t.Fatalf("unexpected error: expected none, got %v", err)
|
t.Fatalf("unexpected error: expected none, got %v", err)
|
||||||
|
|||||||
64
controllers/constants.go
Normal file
64
controllers/constants.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package controllers
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
const (
|
||||||
|
LabelKeyRunnerSetName = "runnerset-name"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// This names requires at least one slash to work.
|
||||||
|
// See https://github.com/google/knative-gcp/issues/378
|
||||||
|
runnerPodFinalizerName = "actions.summerwind.dev/runner-pod"
|
||||||
|
|
||||||
|
annotationKeyPrefix = "actions-runner/"
|
||||||
|
|
||||||
|
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
||||||
|
|
||||||
|
// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
|
||||||
|
AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp"
|
||||||
|
|
||||||
|
// AnnotationKeyRunnerCompletionWaitStartTimestamp is the annotation that is added onto the pod when
|
||||||
|
// ARC decided to wait until the pod to complete by itself, without the need for ARC to unregister the corresponding runner.
|
||||||
|
AnnotationKeyRunnerCompletionWaitStartTimestamp = annotationKeyPrefix + "runner-completion-wait-start-timestamp"
|
||||||
|
|
||||||
|
// unregistarionStartTimestamp is the annotation that contains the time that the requested unregistration process has been started
|
||||||
|
AnnotationKeyUnregistrationStartTimestamp = annotationKeyPrefix + "unregistration-start-timestamp"
|
||||||
|
|
||||||
|
// AnnotationKeyUnregistrationRequestTimestamp is the annotation that contains the time that the unregistration has been requested.
|
||||||
|
// This doesn't immediately start the unregistration. Instead, ARC will first check if the runner has already been registered.
|
||||||
|
// If not, ARC will hold on until the registration to complete first, and only after that it starts the unregistration process.
|
||||||
|
// This is crucial to avoid a race between ARC marking the runner pod for deletion while the actions-runner registers itself to GitHub, leaving the assigned job
|
||||||
|
// hang like forever.
|
||||||
|
AnnotationKeyUnregistrationRequestTimestamp = annotationKeyPrefix + "unregistration-request-timestamp"
|
||||||
|
|
||||||
|
AnnotationKeyRunnerID = annotationKeyPrefix + "id"
|
||||||
|
|
||||||
|
// This can be any value but a larger value can make an unregistration timeout longer than configured in practice.
|
||||||
|
DefaultUnregistrationRetryDelay = time.Minute
|
||||||
|
|
||||||
|
// RetryDelayOnCreateRegistrationError is the delay between retry attempts for runner registration token creation.
|
||||||
|
// Usually, a retry in this case happens when e.g. your PAT has no access to certain scope of runners, like you're using repository admin's token
|
||||||
|
// for creating a broader scoped runner token, like organizationa or enterprise runner token.
|
||||||
|
// Such permission issue will never fixed automatically, so we don't need to retry so often, hence this value.
|
||||||
|
RetryDelayOnCreateRegistrationError = 3 * time.Minute
|
||||||
|
|
||||||
|
// registrationTimeout is the duration until a pod times out after it becomes Ready and Running.
|
||||||
|
// A pod that is timed out can be terminated if needed.
|
||||||
|
registrationTimeout = 10 * time.Minute
|
||||||
|
|
||||||
|
// DefaultRunnerPodRecreationDelayAfterWebhookScale is the delay until syncing the runners with the desired replicas
|
||||||
|
// after a webhook-based scale up.
|
||||||
|
// This is used to prevent ARC from recreating completed runner pods that are deleted soon without being used at all.
|
||||||
|
// In other words, this is used as a timer to wait for the completed runner to emit the next `workflow_job` webhook event to decrease the desired replicas.
|
||||||
|
// So if we set 30 seconds for this, you are basically saying that you would assume GitHub and your installation of ARC to
|
||||||
|
// emit and propagate a workflow_job completion event down to the RunnerSet or RunnerReplicaSet, vha ARC's github webhook server and HRA, in approximately 30 seconds.
|
||||||
|
// In case it actually took more than DefaultRunnerPodRecreationDelayAfterWebhookScale for the workflow_job completion event to arrive,
|
||||||
|
// ARC will recreate the completed runner(s), assuming something went wrong in either GitHub, your K8s cluster, or ARC, so ARC needs to resync anyway.
|
||||||
|
//
|
||||||
|
// See https://github.com/actions-runner-controller/actions-runner-controller/pull/1180
|
||||||
|
DefaultRunnerPodRecreationDelayAfterWebhookScale = 10 * time.Minute
|
||||||
|
|
||||||
|
EnvVarRunnerName = "RUNNER_NAME"
|
||||||
|
EnvVarRunnerToken = "RUNNER_TOKEN"
|
||||||
|
)
|
||||||
@@ -38,6 +38,7 @@ import (
|
|||||||
|
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/github"
|
"github.com/actions-runner-controller/actions-runner-controller/github"
|
||||||
|
"github.com/actions-runner-controller/actions-runner-controller/simulator"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -92,7 +93,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
msg := err.Error()
|
msg := err.Error()
|
||||||
if written, err := w.Write([]byte(msg)); err != nil {
|
if written, err := w.Write([]byte(msg)); err != nil {
|
||||||
autoscaler.Log.Error(err, "failed writing http error response", "msg", msg, "written", written)
|
autoscaler.Log.V(1).Error(err, "failed writing http error response", "msg", msg, "written", written)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -241,18 +242,23 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons
|
|||||||
enterpriseSlug,
|
enterpriseSlug,
|
||||||
labels,
|
labels,
|
||||||
)
|
)
|
||||||
|
if target == nil {
|
||||||
if target != nil {
|
break
|
||||||
if e.GetAction() == "queued" {
|
|
||||||
target.Amount = 1
|
|
||||||
} else if e.GetAction() == "completed" {
|
|
||||||
// A nagative amount is processed in the tryScale func as a scale-down request,
|
|
||||||
// that erasese the oldest CapacityReservation with the same amount.
|
|
||||||
// If the first CapacityReservation was with Replicas=1, this negative scale target erases that,
|
|
||||||
// so that the resulting desired replicas decreases by 1.
|
|
||||||
target.Amount = -1
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if e.GetAction() == "queued" {
|
||||||
|
target.Amount = 1
|
||||||
|
break
|
||||||
|
} else if e.GetAction() == "completed" && e.GetWorkflowJob().GetConclusion() != "skipped" {
|
||||||
|
// A nagative amount is processed in the tryScale func as a scale-down request,
|
||||||
|
// that erasese the oldest CapacityReservation with the same amount.
|
||||||
|
// If the first CapacityReservation was with Replicas=1, this negative scale target erases that,
|
||||||
|
// so that the resulting desired replicas decreases by 1.
|
||||||
|
target.Amount = -1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// If the conclusion is "skipped", we will ignore it and fallthrough to the default case.
|
||||||
|
fallthrough
|
||||||
default:
|
default:
|
||||||
ok = true
|
ok = true
|
||||||
|
|
||||||
@@ -289,7 +295,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons
|
|||||||
}
|
}
|
||||||
|
|
||||||
if target == nil {
|
if target == nil {
|
||||||
log.Info(
|
log.V(1).Info(
|
||||||
"Scale target not found. If this is unexpected, ensure that there is exactly one repository-wide or organizational runner deployment that matches this webhook event",
|
"Scale target not found. If this is unexpected, ensure that there is exactly one repository-wide or organizational runner deployment that matches this webhook event",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -350,9 +356,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) findHRAsByKey(ctx con
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, d := range hraList.Items {
|
hras = append(hras, hraList.Items...)
|
||||||
hras = append(hras, d)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return hras, nil
|
return hras, nil
|
||||||
@@ -476,95 +480,105 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) getScaleUpTargetWithF
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search for organization runner HRAs in default runner group
|
// Find the potential runner groups first to avoid spending API queries needless. Once/if GitHub improves an
|
||||||
if target, err := scaleTarget(owner); err != nil {
|
|
||||||
log.Error(err, "finding organizational runner", "organization", owner)
|
|
||||||
return nil, err
|
|
||||||
} else if target != nil {
|
|
||||||
log.Info("job scale up target is organizational runners", "organization", owner)
|
|
||||||
return target, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if enterprise != "" {
|
|
||||||
// Search for enterprise runner HRAs in default runner group
|
|
||||||
if target, err := scaleTarget(enterpriseKey(enterprise)); err != nil {
|
|
||||||
log.Error(err, "finding enterprise runner", "enterprise", enterprise)
|
|
||||||
return nil, err
|
|
||||||
} else if target != nil {
|
|
||||||
log.Info("scale up target is default enterprise runners", "enterprise", enterprise)
|
|
||||||
return target, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point there were no default organization/enterprise runners available to use, try now
|
|
||||||
// searching in runner groups
|
|
||||||
|
|
||||||
// We need to get the potential runner groups first to avoid spending API queries needless. Once/if GitHub improves an
|
|
||||||
// API to find related/linked runner groups from a specific repository this logic could be removed
|
// API to find related/linked runner groups from a specific repository this logic could be removed
|
||||||
availableEnterpriseGroups, availableOrganizationGroups, err := autoscaler.getPotentialGroupsFromHRAs(ctx, enterprise, owner)
|
managedRunnerGroups, err := autoscaler.getManagedRunnerGroupsFromHRAs(ctx, enterprise, owner)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err, "finding potential organization runner groups from HRAs", "organization", owner)
|
log.Error(err, "finding potential organization/enterprise runner groups from HRAs", "organization", owner)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if len(availableEnterpriseGroups) == 0 && len(availableOrganizationGroups) == 0 {
|
if managedRunnerGroups.IsEmpty() {
|
||||||
log.V(1).Info("no repository/organizational/enterprise runner found",
|
log.V(1).Info("no repository/organizational/enterprise runner found",
|
||||||
"repository", repositoryRunnerKey,
|
"repository", repositoryRunnerKey,
|
||||||
"organization", owner,
|
"organization", owner,
|
||||||
"enterprises", enterprise,
|
"enterprises", enterprise,
|
||||||
)
|
)
|
||||||
|
} else {
|
||||||
|
log.V(1).Info("Found some runner groups are managed by ARC", "groups", managedRunnerGroups)
|
||||||
}
|
}
|
||||||
|
|
||||||
var enterpriseGroups []string
|
var visibleGroups *simulator.VisibleRunnerGroups
|
||||||
var organizationGroups []string
|
|
||||||
if autoscaler.GitHubClient != nil {
|
if autoscaler.GitHubClient != nil {
|
||||||
|
simu := &simulator.Simulator{
|
||||||
|
Client: autoscaler.GitHubClient,
|
||||||
|
}
|
||||||
// Get available organization runner groups and enterprise runner groups for a repository
|
// Get available organization runner groups and enterprise runner groups for a repository
|
||||||
// These are the sum of runner groups with repository access = All repositories plus
|
// These are the sum of runner groups with repository access = All repositories and runner groups
|
||||||
// runner groups where owner/repo has access to
|
// where owner/repo has access to as well. The list will include default runner group also if it has access to
|
||||||
enterpriseGroups, organizationGroups, err = autoscaler.GitHubClient.GetRunnerGroupsFromRepository(ctx, owner, repositoryRunnerKey, availableEnterpriseGroups, availableOrganizationGroups)
|
visibleGroups, err = simu.GetRunnerGroupsVisibleToRepository(ctx, owner, repositoryRunnerKey, managedRunnerGroups)
|
||||||
log.V(1).Info("Searching in runner groups", "enterprise.groups", enterpriseGroups, "organization.groups", organizationGroups)
|
log.V(1).Info("Searching in runner groups", "groups", visibleGroups)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err, "Unable to find runner groups from repository", "organization", owner, "repository", repo)
|
log.Error(err, "Unable to find runner groups from repository", "organization", owner, "repository", repo)
|
||||||
return nil, nil
|
return nil, fmt.Errorf("error while finding visible runner groups: %v", err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// For backwards compatibility if GitHub authentication is not configured, we assume all runner groups have
|
// For backwards compatibility if GitHub authentication is not configured, we assume all runner groups have
|
||||||
// visibility=all to honor the previous implementation, therefore any available enterprise/organization runner
|
// visibility=all to honor the previous implementation, therefore any available enterprise/organization runner
|
||||||
// is a potential target for scaling
|
// is a potential target for scaling. This will also avoid doing extra API calls caused by
|
||||||
enterpriseGroups = availableEnterpriseGroups
|
// GitHubClient.GetRunnerGroupsVisibleToRepository in case users are not using custom visibility on their runner
|
||||||
organizationGroups = availableOrganizationGroups
|
// groups or they are using only default runner groups
|
||||||
|
visibleGroups = managedRunnerGroups
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, group := range organizationGroups {
|
scaleTargetKey := func(rg simulator.RunnerGroup) string {
|
||||||
if target, err := scaleTarget(organizationalRunnerGroupKey(owner, group)); err != nil {
|
switch rg.Kind {
|
||||||
log.Error(err, "finding organizational runner group", "organization", owner)
|
case simulator.Default:
|
||||||
return nil, err
|
switch rg.Scope {
|
||||||
} else if target != nil {
|
case simulator.Organization:
|
||||||
log.Info(fmt.Sprintf("job scale up target is organizational runner group %s", target.Name), "organization", owner)
|
return owner
|
||||||
return target, nil
|
case simulator.Enterprise:
|
||||||
|
return enterpriseKey(enterprise)
|
||||||
|
}
|
||||||
|
case simulator.Custom:
|
||||||
|
switch rg.Scope {
|
||||||
|
case simulator.Organization:
|
||||||
|
return organizationalRunnerGroupKey(owner, rg.Name)
|
||||||
|
case simulator.Enterprise:
|
||||||
|
return enterpriseRunnerGroupKey(enterprise, rg.Name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, group := range enterpriseGroups {
|
log.V(1).Info("groups", "groups", visibleGroups)
|
||||||
if target, err := scaleTarget(enterpriseRunnerGroupKey(enterprise, group)); err != nil {
|
|
||||||
log.Error(err, "finding enterprise runner group", "enterprise", owner)
|
var t *ScaleTarget
|
||||||
return nil, err
|
|
||||||
} else if target != nil {
|
traverseErr := visibleGroups.Traverse(func(rg simulator.RunnerGroup) (bool, error) {
|
||||||
log.Info(fmt.Sprintf("job scale up target is enterprise runner group %s", target.Name), "enterprise", owner)
|
key := scaleTargetKey(rg)
|
||||||
return target, nil
|
|
||||||
|
target, err := scaleTarget(key)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err, "finding runner group", "enterprise", enterprise, "organization", owner, "repository", repo, "key", key)
|
||||||
|
return false, err
|
||||||
|
} else if target == nil {
|
||||||
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t = target
|
||||||
|
log.V(1).Info("job scale up target found", "enterprise", enterprise, "organization", owner, "repository", repo, "key", key)
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if traverseErr != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.V(1).Info("no repository/organizational/enterprise runner found",
|
if t == nil {
|
||||||
"repository", repositoryRunnerKey,
|
log.V(1).Info("no repository/organizational/enterprise runner found",
|
||||||
"organization", owner,
|
"repository", repositoryRunnerKey,
|
||||||
"enterprises", enterprise,
|
"organization", owner,
|
||||||
)
|
"enterprise", enterprise,
|
||||||
return nil, nil
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) getPotentialGroupsFromHRAs(ctx context.Context, enterprise, org string) ([]string, []string, error) {
|
func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) getManagedRunnerGroupsFromHRAs(ctx context.Context, enterprise, org string) (*simulator.VisibleRunnerGroups, error) {
|
||||||
var enterpriseRunnerGroups []string
|
groups := simulator.NewVisibleRunnerGroups()
|
||||||
var orgRunnerGroups []string
|
|
||||||
ns := autoscaler.Namespace
|
ns := autoscaler.Namespace
|
||||||
|
|
||||||
var defaultListOpts []client.ListOption
|
var defaultListOpts []client.ListOption
|
||||||
@@ -579,36 +593,63 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) getPotentialGroupsFro
|
|||||||
|
|
||||||
var hraList v1alpha1.HorizontalRunnerAutoscalerList
|
var hraList v1alpha1.HorizontalRunnerAutoscalerList
|
||||||
if err := autoscaler.List(ctx, &hraList, opts...); err != nil {
|
if err := autoscaler.List(ctx, &hraList, opts...); err != nil {
|
||||||
return orgRunnerGroups, enterpriseRunnerGroups, err
|
return groups, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, hra := range hraList.Items {
|
for _, hra := range hraList.Items {
|
||||||
switch hra.Spec.ScaleTargetRef.Kind {
|
var o, e, g string
|
||||||
|
|
||||||
|
kind := hra.Spec.ScaleTargetRef.Kind
|
||||||
|
switch kind {
|
||||||
case "RunnerSet":
|
case "RunnerSet":
|
||||||
var rs v1alpha1.RunnerSet
|
var rs v1alpha1.RunnerSet
|
||||||
if err := autoscaler.Client.Get(context.Background(), types.NamespacedName{Namespace: hra.Namespace, Name: hra.Spec.ScaleTargetRef.Name}, &rs); err != nil {
|
if err := autoscaler.Client.Get(context.Background(), types.NamespacedName{Namespace: hra.Namespace, Name: hra.Spec.ScaleTargetRef.Name}, &rs); err != nil {
|
||||||
return orgRunnerGroups, enterpriseRunnerGroups, err
|
return groups, err
|
||||||
}
|
|
||||||
if rs.Spec.Organization == org && rs.Spec.Group != "" {
|
|
||||||
orgRunnerGroups = append(orgRunnerGroups, rs.Spec.Group)
|
|
||||||
}
|
|
||||||
if rs.Spec.Enterprise == enterprise && rs.Spec.Group != "" {
|
|
||||||
enterpriseRunnerGroups = append(enterpriseRunnerGroups, rs.Spec.Group)
|
|
||||||
}
|
}
|
||||||
|
o, e, g = rs.Spec.Organization, rs.Spec.Enterprise, rs.Spec.Group
|
||||||
case "RunnerDeployment", "":
|
case "RunnerDeployment", "":
|
||||||
var rd v1alpha1.RunnerDeployment
|
var rd v1alpha1.RunnerDeployment
|
||||||
if err := autoscaler.Client.Get(context.Background(), types.NamespacedName{Namespace: hra.Namespace, Name: hra.Spec.ScaleTargetRef.Name}, &rd); err != nil {
|
if err := autoscaler.Client.Get(context.Background(), types.NamespacedName{Namespace: hra.Namespace, Name: hra.Spec.ScaleTargetRef.Name}, &rd); err != nil {
|
||||||
return orgRunnerGroups, enterpriseRunnerGroups, err
|
return groups, err
|
||||||
}
|
|
||||||
if rd.Spec.Template.Spec.Organization == org && rd.Spec.Template.Spec.Group != "" {
|
|
||||||
orgRunnerGroups = append(orgRunnerGroups, rd.Spec.Template.Spec.Group)
|
|
||||||
}
|
|
||||||
if rd.Spec.Template.Spec.Enterprise == enterprise && rd.Spec.Template.Spec.Group != "" {
|
|
||||||
enterpriseRunnerGroups = append(enterpriseRunnerGroups, rd.Spec.Template.Spec.Group)
|
|
||||||
}
|
}
|
||||||
|
o, e, g = rd.Spec.Template.Spec.Organization, rd.Spec.Template.Spec.Enterprise, rd.Spec.Template.Spec.Group
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported scale target kind: %v", kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
if g != "" && e == "" && o == "" {
|
||||||
|
autoscaler.Log.V(1).Info(
|
||||||
|
"invalid runner group config in scale target: spec.group must be set along with either spec.enterprise or spec.organization",
|
||||||
|
"scaleTargetKind", kind,
|
||||||
|
"group", g,
|
||||||
|
"enterprise", e,
|
||||||
|
"organization", o,
|
||||||
|
)
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if e != enterprise && o != org {
|
||||||
|
autoscaler.Log.V(1).Info(
|
||||||
|
"Skipped scale target irrelevant to event",
|
||||||
|
"eventOrganization", org,
|
||||||
|
"eventEnterprise", enterprise,
|
||||||
|
"scaleTargetKind", kind,
|
||||||
|
"scaleTargetGroup", g,
|
||||||
|
"scaleTargetEnterprise", e,
|
||||||
|
"scaleTargetOrganization", o,
|
||||||
|
)
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
rg := simulator.NewRunnerGroupFromProperties(e, o, g)
|
||||||
|
|
||||||
|
if err := groups.Add(rg); err != nil {
|
||||||
|
return groups, fmt.Errorf("failed adding visible group from HRA %s/%s: %w", hra.Namespace, hra.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return enterpriseRunnerGroups, orgRunnerGroups, nil
|
return groups, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) getJobScaleTarget(ctx context.Context, name string, labels []string) (*ScaleTarget, error) {
|
func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) getJobScaleTarget(ctx context.Context, name string, labels []string) (*ScaleTarget, error) {
|
||||||
@@ -627,16 +668,29 @@ HRA:
|
|||||||
|
|
||||||
if len(hra.Spec.ScaleUpTriggers) > 1 {
|
if len(hra.Spec.ScaleUpTriggers) > 1 {
|
||||||
autoscaler.Log.V(1).Info("Skipping this HRA as it has too many ScaleUpTriggers to be used in workflow_job based scaling", "hra", hra.Name)
|
autoscaler.Log.V(1).Info("Skipping this HRA as it has too many ScaleUpTriggers to be used in workflow_job based scaling", "hra", hra.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(hra.Spec.ScaleUpTriggers) == 0 {
|
||||||
|
autoscaler.Log.V(1).Info("Skipping this HRA as it has no ScaleUpTriggers configured", "hra", hra.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
scaleUpTrigger := hra.Spec.ScaleUpTriggers[0]
|
||||||
|
|
||||||
|
if scaleUpTrigger.GitHubEvent == nil {
|
||||||
|
autoscaler.Log.V(1).Info("Skipping this HRA as it has no `githubEvent` scale trigger configured", "hra", hra.Name)
|
||||||
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var duration metav1.Duration
|
if scaleUpTrigger.GitHubEvent.WorkflowJob == nil {
|
||||||
|
autoscaler.Log.V(1).Info("Skipping this HRA as it has no `githubEvent.workflowJob` scale trigger configured", "hra", hra.Name)
|
||||||
|
|
||||||
if len(hra.Spec.ScaleUpTriggers) > 0 {
|
continue
|
||||||
duration = hra.Spec.ScaleUpTriggers[0].Duration
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
duration := scaleUpTrigger.Duration
|
||||||
if duration.Duration <= 0 {
|
if duration.Duration <= 0 {
|
||||||
// Try to release the reserved capacity after at least 10 minutes by default,
|
// Try to release the reserved capacity after at least 10 minutes by default,
|
||||||
// we won't end up in the reserved capacity remained forever in case GitHub somehow stopped sending us "completed" workflow_job events.
|
// we won't end up in the reserved capacity remained forever in case GitHub somehow stopped sending us "completed" workflow_job events.
|
||||||
@@ -732,8 +786,10 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScale(ctx context.
|
|||||||
capacityReservations := getValidCapacityReservations(copy)
|
capacityReservations := getValidCapacityReservations(copy)
|
||||||
|
|
||||||
if amount > 0 {
|
if amount > 0 {
|
||||||
|
now := time.Now()
|
||||||
copy.Spec.CapacityReservations = append(capacityReservations, v1alpha1.CapacityReservation{
|
copy.Spec.CapacityReservations = append(capacityReservations, v1alpha1.CapacityReservation{
|
||||||
ExpirationTime: metav1.Time{Time: time.Now().Add(target.ScaleUpTrigger.Duration.Duration)},
|
EffectiveTime: metav1.Time{Time: now},
|
||||||
|
ExpirationTime: metav1.Time{Time: now.Add(target.ScaleUpTrigger.Duration.Duration)},
|
||||||
Replicas: amount,
|
Replicas: amount,
|
||||||
})
|
})
|
||||||
} else if amount < 0 {
|
} else if amount < 0 {
|
||||||
@@ -752,10 +808,16 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScale(ctx context.
|
|||||||
copy.Spec.CapacityReservations = reservations
|
copy.Spec.CapacityReservations = reservations
|
||||||
}
|
}
|
||||||
|
|
||||||
autoscaler.Log.Info(
|
before := len(target.HorizontalRunnerAutoscaler.Spec.CapacityReservations)
|
||||||
"Patching hra for capacityReservations update",
|
expired := before - len(capacityReservations)
|
||||||
"before", target.HorizontalRunnerAutoscaler.Spec.CapacityReservations,
|
after := len(copy.Spec.CapacityReservations)
|
||||||
"after", copy.Spec.CapacityReservations,
|
|
||||||
|
autoscaler.Log.V(1).Info(
|
||||||
|
fmt.Sprintf("Patching hra %s for capacityReservations update", target.HorizontalRunnerAutoscaler.Name),
|
||||||
|
"before", before,
|
||||||
|
"expired", expired,
|
||||||
|
"amount", amount,
|
||||||
|
"after", after,
|
||||||
)
|
)
|
||||||
|
|
||||||
if err := autoscaler.Client.Patch(ctx, copy, client.MergeFrom(&target.HorizontalRunnerAutoscaler)); err != nil {
|
if err := autoscaler.Client.Patch(ctx, copy, client.MergeFrom(&target.HorizontalRunnerAutoscaler)); err != nil {
|
||||||
@@ -791,6 +853,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) SetupWithManager(mgr
|
|||||||
hra := rawObj.(*v1alpha1.HorizontalRunnerAutoscaler)
|
hra := rawObj.(*v1alpha1.HorizontalRunnerAutoscaler)
|
||||||
|
|
||||||
if hra.Spec.ScaleTargetRef.Name == "" {
|
if hra.Spec.ScaleTargetRef.Name == "" {
|
||||||
|
autoscaler.Log.V(1).Info(fmt.Sprintf("scale target ref name not set for hra %s", hra.Name))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -820,7 +883,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) SetupWithManager(mgr
|
|||||||
keys = append(keys, enterpriseKey(enterprise)) // Enterprise runners
|
keys = append(keys, enterpriseKey(enterprise)) // Enterprise runners
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
autoscaler.Log.V(1).Info(fmt.Sprintf("HRA keys indexed for HRA %s: %v", hra.Name, keys))
|
autoscaler.Log.V(2).Info(fmt.Sprintf("HRA keys indexed for HRA %s: %v", hra.Name, keys))
|
||||||
return keys
|
return keys
|
||||||
case "RunnerSet":
|
case "RunnerSet":
|
||||||
var rs v1alpha1.RunnerSet
|
var rs v1alpha1.RunnerSet
|
||||||
@@ -845,7 +908,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) SetupWithManager(mgr
|
|||||||
keys = append(keys, enterpriseRunnerGroupKey(enterprise, rs.Spec.Group)) // Enterprise runner groups
|
keys = append(keys, enterpriseRunnerGroupKey(enterprise, rs.Spec.Group)) // Enterprise runner groups
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
autoscaler.Log.V(1).Info(fmt.Sprintf("HRA keys indexed for HRA %s: %v", hra.Name, keys))
|
autoscaler.Log.V(2).Info(fmt.Sprintf("HRA keys indexed for HRA %s: %v", hra.Name, keys))
|
||||||
return keys
|
return keys
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,10 +15,6 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) MatchPushEvent(event
|
|||||||
|
|
||||||
push := g.Push
|
push := g.Push
|
||||||
|
|
||||||
if push == nil {
|
return push != nil
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -138,6 +138,13 @@ func TestWebhookWorkflowJob(t *testing.T) {
|
|||||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
Name: "test-name",
|
Name: "test-name",
|
||||||
},
|
},
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,6 +184,13 @@ func TestWebhookWorkflowJob(t *testing.T) {
|
|||||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
Name: "test-name",
|
Name: "test-name",
|
||||||
},
|
},
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -217,6 +231,13 @@ func TestWebhookWorkflowJob(t *testing.T) {
|
|||||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
Name: "test-name",
|
Name: "test-name",
|
||||||
},
|
},
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -277,6 +298,13 @@ func TestWebhookWorkflowJobWithSelfHostedLabel(t *testing.T) {
|
|||||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
Name: "test-name",
|
Name: "test-name",
|
||||||
},
|
},
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -316,6 +344,13 @@ func TestWebhookWorkflowJobWithSelfHostedLabel(t *testing.T) {
|
|||||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
Name: "test-name",
|
Name: "test-name",
|
||||||
},
|
},
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -356,6 +391,13 @@ func TestWebhookWorkflowJobWithSelfHostedLabel(t *testing.T) {
|
|||||||
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
Name: "test-name",
|
Name: "test-name",
|
||||||
},
|
},
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,13 +47,13 @@ const (
|
|||||||
// HorizontalRunnerAutoscalerReconciler reconciles a HorizontalRunnerAutoscaler object
|
// HorizontalRunnerAutoscalerReconciler reconciles a HorizontalRunnerAutoscaler object
|
||||||
type HorizontalRunnerAutoscalerReconciler struct {
|
type HorizontalRunnerAutoscalerReconciler struct {
|
||||||
client.Client
|
client.Client
|
||||||
GitHubClient *github.Client
|
GitHubClient *github.Client
|
||||||
Log logr.Logger
|
Log logr.Logger
|
||||||
Recorder record.EventRecorder
|
Recorder record.EventRecorder
|
||||||
Scheme *runtime.Scheme
|
Scheme *runtime.Scheme
|
||||||
|
CacheDuration time.Duration
|
||||||
CacheDuration time.Duration
|
DefaultScaleDownDelay time.Duration
|
||||||
Name string
|
Name string
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaultReplicas = 1
|
const defaultReplicas = 1
|
||||||
@@ -99,11 +99,33 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(ctx context.Context, re
|
|||||||
return r.reconcile(ctx, req, log, hra, st, func(newDesiredReplicas int) error {
|
return r.reconcile(ctx, req, log, hra, st, func(newDesiredReplicas int) error {
|
||||||
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
|
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
|
||||||
|
|
||||||
|
ephemeral := rd.Spec.Template.Spec.Ephemeral == nil || *rd.Spec.Template.Spec.Ephemeral
|
||||||
|
|
||||||
|
var effectiveTime *time.Time
|
||||||
|
|
||||||
|
for _, r := range hra.Spec.CapacityReservations {
|
||||||
|
t := r.EffectiveTime
|
||||||
|
if effectiveTime == nil || effectiveTime.Before(t.Time) {
|
||||||
|
effectiveTime = &t.Time
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
||||||
if currentDesiredReplicas != newDesiredReplicas {
|
if currentDesiredReplicas != newDesiredReplicas {
|
||||||
copy := rd.DeepCopy()
|
copy := rd.DeepCopy()
|
||||||
copy.Spec.Replicas = &newDesiredReplicas
|
copy.Spec.Replicas = &newDesiredReplicas
|
||||||
|
|
||||||
|
if ephemeral && effectiveTime != nil {
|
||||||
|
copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil {
|
||||||
|
return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err)
|
||||||
|
}
|
||||||
|
} else if ephemeral && effectiveTime != nil {
|
||||||
|
copy := rd.DeepCopy()
|
||||||
|
copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime}
|
||||||
|
|
||||||
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil {
|
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil {
|
||||||
return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err)
|
return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err)
|
||||||
}
|
}
|
||||||
@@ -137,6 +159,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(ctx context.Context, re
|
|||||||
org: rs.Spec.Organization,
|
org: rs.Spec.Organization,
|
||||||
repo: rs.Spec.Repository,
|
repo: rs.Spec.Repository,
|
||||||
replicas: replicas,
|
replicas: replicas,
|
||||||
|
labels: rs.Spec.RunnerConfig.Labels,
|
||||||
getRunnerMap: func() (map[string]struct{}, error) {
|
getRunnerMap: func() (map[string]struct{}, error) {
|
||||||
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
|
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
|
||||||
var runnerPodList corev1.PodList
|
var runnerPodList corev1.PodList
|
||||||
@@ -180,15 +203,38 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(ctx context.Context, re
|
|||||||
}
|
}
|
||||||
currentDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)
|
currentDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)
|
||||||
|
|
||||||
|
ephemeral := rs.Spec.Ephemeral == nil || *rs.Spec.Ephemeral
|
||||||
|
|
||||||
|
var effectiveTime *time.Time
|
||||||
|
|
||||||
|
for _, r := range hra.Spec.CapacityReservations {
|
||||||
|
t := r.EffectiveTime
|
||||||
|
if effectiveTime == nil || effectiveTime.Before(t.Time) {
|
||||||
|
effectiveTime = &t.Time
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if currentDesiredReplicas != newDesiredReplicas {
|
if currentDesiredReplicas != newDesiredReplicas {
|
||||||
copy := rs.DeepCopy()
|
copy := rs.DeepCopy()
|
||||||
v := int32(newDesiredReplicas)
|
v := int32(newDesiredReplicas)
|
||||||
copy.Spec.Replicas = &v
|
copy.Spec.Replicas = &v
|
||||||
|
|
||||||
|
if ephemeral && effectiveTime != nil {
|
||||||
|
copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rs)); err != nil {
|
||||||
|
return fmt.Errorf("patching runnerset to have %d replicas: %w", newDesiredReplicas, err)
|
||||||
|
}
|
||||||
|
} else if ephemeral && effectiveTime != nil {
|
||||||
|
copy := rs.DeepCopy()
|
||||||
|
copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime}
|
||||||
|
|
||||||
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rs)); err != nil {
|
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rs)); err != nil {
|
||||||
return fmt.Errorf("patching runnerset to have %d replicas: %w", newDesiredReplicas, err)
|
return fmt.Errorf("patching runnerset to have %d replicas: %w", newDesiredReplicas, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -206,6 +252,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) scaleTargetFromRD(ctx context.Con
|
|||||||
org: rd.Spec.Template.Spec.Organization,
|
org: rd.Spec.Template.Spec.Organization,
|
||||||
repo: rd.Spec.Template.Spec.Repository,
|
repo: rd.Spec.Template.Spec.Repository,
|
||||||
replicas: rd.Spec.Replicas,
|
replicas: rd.Spec.Replicas,
|
||||||
|
labels: rd.Spec.Template.Spec.RunnerConfig.Labels,
|
||||||
getRunnerMap: func() (map[string]struct{}, error) {
|
getRunnerMap: func() (map[string]struct{}, error) {
|
||||||
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
|
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
|
||||||
var runnerList v1alpha1.RunnerList
|
var runnerList v1alpha1.RunnerList
|
||||||
@@ -248,6 +295,7 @@ type scaleTarget struct {
|
|||||||
st, kind string
|
st, kind string
|
||||||
enterprise, repo, org string
|
enterprise, repo, org string
|
||||||
replicas *int
|
replicas *int
|
||||||
|
labels []string
|
||||||
|
|
||||||
getRunnerMap func() (map[string]struct{}, error)
|
getRunnerMap func() (map[string]struct{}, error)
|
||||||
}
|
}
|
||||||
@@ -262,7 +310,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) reconcile(ctx context.Context, re
|
|||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
newDesiredReplicas, computedReplicas, computedReplicasFromCache, err := r.computeReplicasWithCache(log, now, st, hra, minReplicas)
|
newDesiredReplicas, err := r.computeReplicasWithCache(log, now, st, hra, minReplicas)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
||||||
|
|
||||||
@@ -287,24 +335,6 @@ func (r *HorizontalRunnerAutoscalerReconciler) reconcile(ctx context.Context, re
|
|||||||
updated.Status.DesiredReplicas = &newDesiredReplicas
|
updated.Status.DesiredReplicas = &newDesiredReplicas
|
||||||
}
|
}
|
||||||
|
|
||||||
if computedReplicasFromCache == nil {
|
|
||||||
cacheEntries := getValidCacheEntries(updated, now)
|
|
||||||
|
|
||||||
var cacheDuration time.Duration
|
|
||||||
|
|
||||||
if r.CacheDuration > 0 {
|
|
||||||
cacheDuration = r.CacheDuration
|
|
||||||
} else {
|
|
||||||
cacheDuration = 10 * time.Minute
|
|
||||||
}
|
|
||||||
|
|
||||||
updated.Status.CacheEntries = append(cacheEntries, v1alpha1.CacheEntry{
|
|
||||||
Key: v1alpha1.CacheEntryKeyDesiredReplicas,
|
|
||||||
Value: computedReplicas,
|
|
||||||
ExpirationTime: metav1.Time{Time: time.Now().Add(cacheDuration)},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
var overridesSummary string
|
var overridesSummary string
|
||||||
|
|
||||||
if (active != nil && upcoming == nil) || (active != nil && upcoming != nil && active.Period.EndTime.Before(upcoming.Period.StartTime)) {
|
if (active != nil && upcoming == nil) || (active != nil && upcoming != nil && active.Period.EndTime.Before(upcoming.Period.StartTime)) {
|
||||||
@@ -339,18 +369,6 @@ func (r *HorizontalRunnerAutoscalerReconciler) reconcile(ctx context.Context, re
|
|||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getValidCacheEntries(hra *v1alpha1.HorizontalRunnerAutoscaler, now time.Time) []v1alpha1.CacheEntry {
|
|
||||||
var cacheEntries []v1alpha1.CacheEntry
|
|
||||||
|
|
||||||
for _, ent := range hra.Status.CacheEntries {
|
|
||||||
if ent.ExpirationTime.After(now) {
|
|
||||||
cacheEntries = append(cacheEntries, ent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return cacheEntries
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *HorizontalRunnerAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
func (r *HorizontalRunnerAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
name := "horizontalrunnerautoscaler-controller"
|
name := "horizontalrunnerautoscaler-controller"
|
||||||
if r.Name != "" {
|
if r.Name != "" {
|
||||||
@@ -443,32 +461,18 @@ func (r *HorizontalRunnerAutoscalerReconciler) getMinReplicas(log logr.Logger, n
|
|||||||
return minReplicas, active, upcoming, nil
|
return minReplicas, active, upcoming, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr.Logger, now time.Time, st scaleTarget, hra v1alpha1.HorizontalRunnerAutoscaler, minReplicas int) (int, int, *int, error) {
|
func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr.Logger, now time.Time, st scaleTarget, hra v1alpha1.HorizontalRunnerAutoscaler, minReplicas int) (int, error) {
|
||||||
var suggestedReplicas int
|
var suggestedReplicas int
|
||||||
|
|
||||||
suggestedReplicasFromCache := r.fetchSuggestedReplicasFromCache(hra)
|
v, err := r.suggestDesiredReplicas(st, hra)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
var cached *int
|
if v == nil {
|
||||||
|
suggestedReplicas = minReplicas
|
||||||
if suggestedReplicasFromCache != nil {
|
|
||||||
cached = suggestedReplicasFromCache
|
|
||||||
|
|
||||||
if cached == nil {
|
|
||||||
suggestedReplicas = minReplicas
|
|
||||||
} else {
|
|
||||||
suggestedReplicas = *cached
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
v, err := r.suggestDesiredReplicas(st, hra)
|
suggestedReplicas = *v
|
||||||
if err != nil {
|
|
||||||
return 0, 0, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if v == nil {
|
|
||||||
suggestedReplicas = minReplicas
|
|
||||||
} else {
|
|
||||||
suggestedReplicas = *v
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var reserved int
|
var reserved int
|
||||||
@@ -496,7 +500,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr
|
|||||||
if hra.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
|
if hra.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
|
||||||
scaleDownDelay = time.Duration(*hra.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second
|
scaleDownDelay = time.Duration(*hra.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second
|
||||||
} else {
|
} else {
|
||||||
scaleDownDelay = DefaultScaleDownDelay
|
scaleDownDelay = r.DefaultScaleDownDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
var scaleDownDelayUntil *time.Time
|
var scaleDownDelayUntil *time.Time
|
||||||
@@ -527,8 +531,8 @@ func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr
|
|||||||
"min", minReplicas,
|
"min", minReplicas,
|
||||||
}
|
}
|
||||||
|
|
||||||
if cached != nil {
|
if maxReplicas := hra.Spec.MaxReplicas; maxReplicas != nil {
|
||||||
kvs = append(kvs, "cached", *cached)
|
kvs = append(kvs, "max", *maxReplicas)
|
||||||
}
|
}
|
||||||
|
|
||||||
if scaleDownDelayUntil != nil {
|
if scaleDownDelayUntil != nil {
|
||||||
@@ -536,13 +540,9 @@ func (r *HorizontalRunnerAutoscalerReconciler) computeReplicasWithCache(log logr
|
|||||||
kvs = append(kvs, "scale_down_delay_until", scaleDownDelayUntil)
|
kvs = append(kvs, "scale_down_delay_until", scaleDownDelayUntil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if maxReplicas := hra.Spec.MaxReplicas; maxReplicas != nil {
|
|
||||||
kvs = append(kvs, "max", *maxReplicas)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.V(1).Info(fmt.Sprintf("Calculated desired replicas of %d", newDesiredReplicas),
|
log.V(1).Info(fmt.Sprintf("Calculated desired replicas of %d", newDesiredReplicas),
|
||||||
kvs...,
|
kvs...,
|
||||||
)
|
)
|
||||||
|
|
||||||
return newDesiredReplicas, suggestedReplicas, suggestedReplicasFromCache, nil
|
return newDesiredReplicas, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
package controllers
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
|
||||||
"github.com/google/go-cmp/cmp"
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestGetValidCacheEntries(t *testing.T) {
|
|
||||||
now := time.Now()
|
|
||||||
|
|
||||||
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
|
||||||
Status: actionsv1alpha1.HorizontalRunnerAutoscalerStatus{
|
|
||||||
CacheEntries: []actionsv1alpha1.CacheEntry{
|
|
||||||
{
|
|
||||||
Key: "foo",
|
|
||||||
Value: 1,
|
|
||||||
ExpirationTime: metav1.Time{Time: now.Add(-time.Second)},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Key: "foo",
|
|
||||||
Value: 2,
|
|
||||||
ExpirationTime: metav1.Time{Time: now},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Key: "foo",
|
|
||||||
Value: 3,
|
|
||||||
ExpirationTime: metav1.Time{Time: now.Add(time.Second)},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
revs := getValidCacheEntries(hra, now)
|
|
||||||
|
|
||||||
counts := map[string]int{}
|
|
||||||
|
|
||||||
for _, r := range revs {
|
|
||||||
counts[r.Key] += r.Value
|
|
||||||
}
|
|
||||||
|
|
||||||
want := map[string]int{"foo": 3}
|
|
||||||
|
|
||||||
if d := cmp.Diff(want, counts); d != "" {
|
|
||||||
t.Errorf("%s", d)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -108,8 +108,9 @@ func SetupIntegrationTest(ctx2 context.Context) *testEnvironment {
|
|||||||
RunnerImage: "example/runner:test",
|
RunnerImage: "example/runner:test",
|
||||||
DockerImage: "example/docker:test",
|
DockerImage: "example/docker:test",
|
||||||
Name: controllerName("runner"),
|
Name: controllerName("runner"),
|
||||||
RegistrationRecheckInterval: time.Millisecond,
|
RegistrationRecheckInterval: time.Millisecond * 100,
|
||||||
RegistrationRecheckJitter: time.Millisecond,
|
RegistrationRecheckJitter: time.Millisecond * 10,
|
||||||
|
UnregistrationRetryDelay: 1 * time.Second,
|
||||||
}
|
}
|
||||||
err = runnerController.SetupWithManager(mgr)
|
err = runnerController.SetupWithManager(mgr)
|
||||||
Expect(err).NotTo(HaveOccurred(), "failed to setup runner controller")
|
Expect(err).NotTo(HaveOccurred(), "failed to setup runner controller")
|
||||||
@@ -268,7 +269,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||||||
|
|
||||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2)
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2)
|
||||||
ExpectHRAStatusCacheEntryLengthEventuallyEquals(ctx, ns.Name, name, 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -371,7 +371,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||||||
|
|
||||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3)
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3)
|
||||||
ExpectHRAStatusCacheEntryLengthEventuallyEquals(ctx, ns.Name, name, 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -538,6 +537,106 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should create and scale organization's repository runners on workflow_job event", func() {
|
||||||
|
name := "example-runnerdeploy"
|
||||||
|
|
||||||
|
{
|
||||||
|
rd := &actionsv1alpha1.RunnerDeployment{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerDeploymentSpec{
|
||||||
|
Replicas: intPtr(1),
|
||||||
|
Selector: &metav1.LabelSelector{
|
||||||
|
MatchLabels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Template: actionsv1alpha1.RunnerTemplate{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Labels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerSpec{
|
||||||
|
RunnerConfig: actionsv1alpha1.RunnerConfig{
|
||||||
|
Repository: "test/valid",
|
||||||
|
Image: "bar",
|
||||||
|
Group: "baz",
|
||||||
|
},
|
||||||
|
RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
|
||||||
|
Env: []corev1.EnvVar{
|
||||||
|
{Name: "FOO", Value: "FOOVALUE"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||||
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale-up to 1 replica via ScaleUpTriggers.GitHubEvent.WorkflowJob based scaling
|
||||||
|
{
|
||||||
|
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||||
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
MinReplicas: intPtr(1),
|
||||||
|
MaxReplicas: intPtr(5),
|
||||||
|
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
Amount: 1,
|
||||||
|
Duration: metav1.Duration{Duration: time.Minute},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectCreate(ctx, hra, "test HorizontalRunnerAutoscaler")
|
||||||
|
|
||||||
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale-up to 2 replicas on first workflow_job.queued webhook event
|
||||||
|
{
|
||||||
|
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"})
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale-up to 3 replicas on second workflow_job.queued webhook event
|
||||||
|
{
|
||||||
|
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"})
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not scale-up on third workflow_job.queued webhook event
|
||||||
|
// repo "example" doesn't match our Spec
|
||||||
|
{
|
||||||
|
env.SendWorkflowJobEvent("test", "example", "queued", []string{"self-hosted"})
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after third webhook event")
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
It("should create and scale organization's repository runners only on check_run event", func() {
|
It("should create and scale organization's repository runners only on check_run event", func() {
|
||||||
name := "example-runnerdeploy"
|
name := "example-runnerdeploy"
|
||||||
|
|
||||||
@@ -582,9 +681,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale-up to 3 replicas by the default TotalNumberOfQueuedAndInProgressWorkflowRuns-based scaling
|
// Scale-up to 1 replica via ScaleUpTriggers.GitHubEvent.CheckRun based scaling
|
||||||
// See workflowRunsFor3Replicas_queued and workflowRunsFor3Replicas_in_progress for GitHub List-Runners API responses
|
|
||||||
// used while testing.
|
|
||||||
{
|
{
|
||||||
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@@ -1077,24 +1174,176 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should be able to scale visible organization runner group with default labels", func() {
|
||||||
|
name := "example-runnerdeploy"
|
||||||
|
|
||||||
|
{
|
||||||
|
rd := &actionsv1alpha1.RunnerDeployment{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerDeploymentSpec{
|
||||||
|
Replicas: intPtr(1),
|
||||||
|
Selector: &metav1.LabelSelector{
|
||||||
|
MatchLabels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Template: actionsv1alpha1.RunnerTemplate{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Labels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerSpec{
|
||||||
|
RunnerConfig: actionsv1alpha1.RunnerConfig{
|
||||||
|
Repository: "test/valid",
|
||||||
|
Image: "bar",
|
||||||
|
Group: "baz",
|
||||||
|
},
|
||||||
|
RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
|
||||||
|
Env: []corev1.EnvVar{
|
||||||
|
{Name: "FOO", Value: "FOOVALUE"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||||
|
|
||||||
|
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||||
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
MinReplicas: intPtr(1),
|
||||||
|
MaxReplicas: intPtr(5),
|
||||||
|
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
Amount: 1,
|
||||||
|
Duration: metav1.Duration{Duration: time.Minute},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectCreate(ctx, hra, "test HorizontalRunnerAutoscaler")
|
||||||
|
|
||||||
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale-up to 2 replicas on first workflow_job webhook event
|
||||||
|
{
|
||||||
|
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"})
|
||||||
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be able to scale visible organization runner group with custom labels", func() {
|
||||||
|
name := "example-runnerdeploy"
|
||||||
|
|
||||||
|
{
|
||||||
|
rd := &actionsv1alpha1.RunnerDeployment{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerDeploymentSpec{
|
||||||
|
Replicas: intPtr(1),
|
||||||
|
Selector: &metav1.LabelSelector{
|
||||||
|
MatchLabels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Template: actionsv1alpha1.RunnerTemplate{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Labels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerSpec{
|
||||||
|
RunnerConfig: actionsv1alpha1.RunnerConfig{
|
||||||
|
Repository: "test/valid",
|
||||||
|
Image: "bar",
|
||||||
|
Group: "baz",
|
||||||
|
Labels: []string{"custom-label"},
|
||||||
|
},
|
||||||
|
RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
|
||||||
|
Env: []corev1.EnvVar{
|
||||||
|
{Name: "FOO", Value: "FOOVALUE"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||||
|
|
||||||
|
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
|
||||||
|
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
MinReplicas: intPtr(1),
|
||||||
|
MaxReplicas: intPtr(5),
|
||||||
|
ScaleDownDelaySecondsAfterScaleUp: intPtr(1),
|
||||||
|
ScaleUpTriggers: []actionsv1alpha1.ScaleUpTrigger{
|
||||||
|
{
|
||||||
|
GitHubEvent: &actionsv1alpha1.GitHubEventScaleUpTriggerSpec{
|
||||||
|
WorkflowJob: &actionsv1alpha1.WorkflowJobSpec{},
|
||||||
|
},
|
||||||
|
Amount: 1,
|
||||||
|
Duration: metav1.Duration{Duration: time.Minute},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ExpectCreate(ctx, hra, "test HorizontalRunnerAutoscaler")
|
||||||
|
|
||||||
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale-up to 2 replicas on first workflow_job webhook event
|
||||||
|
{
|
||||||
|
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"custom-label"})
|
||||||
|
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||||
|
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||||
|
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
func ExpectHRAStatusCacheEntryLengthEventuallyEquals(ctx context.Context, ns string, name string, value int, optionalDescriptions ...interface{}) {
|
|
||||||
EventuallyWithOffset(
|
|
||||||
1,
|
|
||||||
func() int {
|
|
||||||
var hra actionsv1alpha1.HorizontalRunnerAutoscaler
|
|
||||||
|
|
||||||
err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, &hra)
|
|
||||||
|
|
||||||
ExpectWithOffset(1, err).NotTo(HaveOccurred(), "failed to get test HRA resource")
|
|
||||||
|
|
||||||
return len(hra.Status.CacheEntries)
|
|
||||||
},
|
|
||||||
time.Second*5, time.Millisecond*500).Should(Equal(value), optionalDescriptions...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExpectHRADesiredReplicasEquals(ctx context.Context, ns, name string, desired int, optionalDescriptions ...interface{}) {
|
func ExpectHRADesiredReplicasEquals(ctx context.Context, ns, name string, desired int, optionalDescriptions ...interface{}) {
|
||||||
var rd actionsv1alpha1.HorizontalRunnerAutoscaler
|
var rd actionsv1alpha1.HorizontalRunnerAutoscaler
|
||||||
|
|
||||||
@@ -1166,6 +1415,30 @@ func (env *testEnvironment) SendOrgCheckRunEvent(org, repo, status, action strin
|
|||||||
ExpectWithOffset(1, resp.StatusCode).To(Equal(200))
|
ExpectWithOffset(1, resp.StatusCode).To(Equal(200))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (env *testEnvironment) SendWorkflowJobEvent(org, repo, statusAndAction string, labels []string) {
|
||||||
|
resp, err := sendWebhook(env.webhookServer, "workflow_job", &github.WorkflowJobEvent{
|
||||||
|
WorkflowJob: &github.WorkflowJob{
|
||||||
|
Status: &statusAndAction,
|
||||||
|
Labels: labels,
|
||||||
|
},
|
||||||
|
Org: &github.Organization{
|
||||||
|
Login: github.String(org),
|
||||||
|
},
|
||||||
|
Repo: &github.Repository{
|
||||||
|
Name: github.String(repo),
|
||||||
|
Owner: &github.User{
|
||||||
|
Login: github.String(org),
|
||||||
|
Type: github.String("Organization"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Action: github.String(statusAndAction),
|
||||||
|
})
|
||||||
|
|
||||||
|
ExpectWithOffset(1, err).NotTo(HaveOccurred(), "failed to send workflow_job event")
|
||||||
|
|
||||||
|
ExpectWithOffset(1, resp.StatusCode).To(Equal(200))
|
||||||
|
}
|
||||||
|
|
||||||
func (env *testEnvironment) SendUserPullRequestEvent(owner, repo, branch, action string) {
|
func (env *testEnvironment) SendUserPullRequestEvent(owner, repo, branch, action string) {
|
||||||
resp, err := sendWebhook(env.webhookServer, "pull_request", &github.PullRequestEvent{
|
resp, err := sendWebhook(env.webhookServer, "pull_request", &github.PullRequestEvent{
|
||||||
PullRequest: &github.PullRequest{
|
PullRequest: &github.PullRequest{
|
||||||
|
|||||||
1122
controllers/new_runner_pod_test.go
Normal file
1122
controllers/new_runner_pod_test.go
Normal file
File diff suppressed because it is too large
Load Diff
74
controllers/persistent_volume_claim_controller.go
Normal file
74
controllers/persistent_volume_claim_controller.go
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022 The actions-runner-controller authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package controllers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/go-logr/logr"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
|
"k8s.io/client-go/tools/record"
|
||||||
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RunnerPersistentVolumeClaimReconciler reconciles a PersistentVolume object
|
||||||
|
type RunnerPersistentVolumeClaimReconciler struct {
|
||||||
|
client.Client
|
||||||
|
Log logr.Logger
|
||||||
|
Recorder record.EventRecorder
|
||||||
|
Scheme *runtime.Scheme
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;update;patch;delete
|
||||||
|
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
|
||||||
|
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||||
|
|
||||||
|
func (r *RunnerPersistentVolumeClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||||
|
log := r.Log.WithValues("pvc", req.NamespacedName)
|
||||||
|
|
||||||
|
var pvc corev1.PersistentVolumeClaim
|
||||||
|
if err := r.Get(ctx, req.NamespacedName, &pvc); err != nil {
|
||||||
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := syncPVC(ctx, r.Client, log, req.Namespace, &pvc)
|
||||||
|
|
||||||
|
if res == nil {
|
||||||
|
res = &ctrl.Result{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return *res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RunnerPersistentVolumeClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
|
name := "runnerpersistentvolumeclaim-controller"
|
||||||
|
if r.Name != "" {
|
||||||
|
name = r.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Recorder = mgr.GetEventRecorderFor(name)
|
||||||
|
|
||||||
|
return ctrl.NewControllerManagedBy(mgr).
|
||||||
|
For(&corev1.PersistentVolumeClaim{}).
|
||||||
|
Named(name).
|
||||||
|
Complete(r)
|
||||||
|
}
|
||||||
72
controllers/persistent_volume_controller.go
Normal file
72
controllers/persistent_volume_controller.go
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022 The actions-runner-controller authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package controllers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/go-logr/logr"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
|
"k8s.io/client-go/tools/record"
|
||||||
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RunnerPersistentVolumeReconciler reconciles a PersistentVolume object
|
||||||
|
type RunnerPersistentVolumeReconciler struct {
|
||||||
|
client.Client
|
||||||
|
Log logr.Logger
|
||||||
|
Recorder record.EventRecorder
|
||||||
|
Scheme *runtime.Scheme
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
|
||||||
|
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||||
|
|
||||||
|
func (r *RunnerPersistentVolumeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||||
|
log := r.Log.WithValues("pv", req.NamespacedName)
|
||||||
|
|
||||||
|
var pv corev1.PersistentVolume
|
||||||
|
if err := r.Get(ctx, req.NamespacedName, &pv); err != nil {
|
||||||
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := syncPV(ctx, r.Client, log, req.Namespace, &pv)
|
||||||
|
if res == nil {
|
||||||
|
res = &ctrl.Result{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return *res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RunnerPersistentVolumeReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
|
name := "runnerpersistentvolume-controller"
|
||||||
|
if r.Name != "" {
|
||||||
|
name = r.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Recorder = mgr.GetEventRecorderFor(name)
|
||||||
|
|
||||||
|
return ctrl.NewControllerManagedBy(mgr).
|
||||||
|
For(&corev1.PersistentVolume{}).
|
||||||
|
Named(name).
|
||||||
|
Complete(r)
|
||||||
|
}
|
||||||
@@ -59,9 +59,9 @@ func (t *PodRunnerTokenInjector) Handle(ctx context.Context, req admission.Reque
|
|||||||
return newEmptyResponse()
|
return newEmptyResponse()
|
||||||
}
|
}
|
||||||
|
|
||||||
enterprise, okEnterprise := getEnv(runnerContainer, "RUNNER_ENTERPRISE")
|
enterprise, okEnterprise := getEnv(runnerContainer, EnvVarEnterprise)
|
||||||
repo, okRepo := getEnv(runnerContainer, "RUNNER_REPO")
|
repo, okRepo := getEnv(runnerContainer, EnvVarRepo)
|
||||||
org, okOrg := getEnv(runnerContainer, "RUNNER_ORG")
|
org, okOrg := getEnv(runnerContainer, EnvVarOrg)
|
||||||
if !okRepo || !okOrg || !okEnterprise {
|
if !okRepo || !okOrg || !okEnterprise {
|
||||||
return newEmptyResponse()
|
return newEmptyResponse()
|
||||||
}
|
}
|
||||||
@@ -78,9 +78,7 @@ func (t *PodRunnerTokenInjector) Handle(ctx context.Context, req admission.Reque
|
|||||||
|
|
||||||
updated.Annotations[AnnotationKeyTokenExpirationDate] = ts
|
updated.Annotations[AnnotationKeyTokenExpirationDate] = ts
|
||||||
|
|
||||||
if pod.Spec.RestartPolicy != corev1.RestartPolicyOnFailure {
|
forceRunnerPodRestartPolicyNever(updated)
|
||||||
updated.Spec.RestartPolicy = corev1.RestartPolicyOnFailure
|
|
||||||
}
|
|
||||||
|
|
||||||
buf, err := json.Marshal(updated)
|
buf, err := json.Marshal(updated)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -18,15 +18,12 @@ package controllers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/hash"
|
"github.com/actions-runner-controller/actions-runner-controller/hash"
|
||||||
"github.com/go-logr/logr"
|
"github.com/go-logr/logr"
|
||||||
gogithub "github.com/google/go-github/v39/github"
|
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
|
|
||||||
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
@@ -50,12 +47,11 @@ const (
|
|||||||
|
|
||||||
retryDelayOnGitHubAPIRateLimitError = 30 * time.Second
|
retryDelayOnGitHubAPIRateLimitError = 30 * time.Second
|
||||||
|
|
||||||
// This is an annotation internal to actions-runner-controller and can change in backward-incompatible ways
|
|
||||||
annotationKeyRegistrationOnly = "actions-runner-controller/registration-only"
|
|
||||||
|
|
||||||
EnvVarOrg = "RUNNER_ORG"
|
EnvVarOrg = "RUNNER_ORG"
|
||||||
EnvVarRepo = "RUNNER_REPO"
|
EnvVarRepo = "RUNNER_REPO"
|
||||||
EnvVarEnterprise = "RUNNER_ENTERPRISE"
|
EnvVarEnterprise = "RUNNER_ENTERPRISE"
|
||||||
|
EnvVarEphemeral = "RUNNER_EPHEMERAL"
|
||||||
|
EnvVarTrue = "true"
|
||||||
)
|
)
|
||||||
|
|
||||||
// RunnerReconciler reconciles a Runner object
|
// RunnerReconciler reconciles a Runner object
|
||||||
@@ -72,6 +68,8 @@ type RunnerReconciler struct {
|
|||||||
Name string
|
Name string
|
||||||
RegistrationRecheckInterval time.Duration
|
RegistrationRecheckInterval time.Duration
|
||||||
RegistrationRecheckJitter time.Duration
|
RegistrationRecheckJitter time.Duration
|
||||||
|
|
||||||
|
UnregistrationRetryDelay time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
|
||||||
@@ -89,12 +87,6 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
|
|||||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err := runner.Validate()
|
|
||||||
if err != nil {
|
|
||||||
log.Info("Failed to validate runner spec", "error", err.Error())
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if runner.ObjectMeta.DeletionTimestamp.IsZero() {
|
if runner.ObjectMeta.DeletionTimestamp.IsZero() {
|
||||||
finalizers, added := addFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
|
finalizers, added := addFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
|
||||||
|
|
||||||
@@ -111,35 +103,16 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Request to remove a runner. DeletionTimestamp was set in the runner - we need to unregister runner
|
// Request to remove a runner. DeletionTimestamp was set in the runner - we need to unregister runner
|
||||||
return r.processRunnerDeletion(runner, ctx, log)
|
|
||||||
}
|
|
||||||
|
|
||||||
registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly)
|
|
||||||
if registrationOnly && runner.Status.Phase != "" {
|
|
||||||
// At this point we are sure that the registration-only runner has successfully configured and
|
|
||||||
// is of `offline` status, because we set runner.Status.Phase to that of the runner pod only after
|
|
||||||
// successful registration.
|
|
||||||
|
|
||||||
var pod corev1.Pod
|
var pod corev1.Pod
|
||||||
if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
|
if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
|
||||||
if !kerrors.IsNotFound(err) {
|
if !kerrors.IsNotFound(err) {
|
||||||
log.Info(fmt.Sprintf("Retrying soon as we failed to get registration-only runner pod: %v", err))
|
log.Info(fmt.Sprintf("Retrying soon as we failed to get runner pod: %v", err))
|
||||||
|
|
||||||
return ctrl.Result{Requeue: true}, nil
|
|
||||||
}
|
|
||||||
} else if err := r.Delete(ctx, &pod); err != nil {
|
|
||||||
if !kerrors.IsNotFound(err) {
|
|
||||||
log.Info(fmt.Sprintf("Retrying soon as we failed to delete registration-only runner pod: %v", err))
|
|
||||||
|
|
||||||
return ctrl.Result{Requeue: true}, nil
|
return ctrl.Result{Requeue: true}, nil
|
||||||
}
|
}
|
||||||
|
// Pod was not found
|
||||||
|
return r.processRunnerDeletion(runner, ctx, log, nil)
|
||||||
}
|
}
|
||||||
|
return r.processRunnerDeletion(runner, ctx, log, &pod)
|
||||||
log.Info("Successfully deleted registration-only runner pod to free node and cluster resource")
|
|
||||||
|
|
||||||
// Return here to not recreate the deleted pod, because recreating it is the waste of cluster and node resource,
|
|
||||||
// and also defeats the original purpose of scale-from/to-zero we're trying to implement by using the registration-only runner.
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var pod corev1.Pod
|
var pod corev1.Pod
|
||||||
@@ -151,15 +124,67 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
|
|||||||
return r.processRunnerCreation(ctx, runner, log)
|
return r.processRunnerCreation(ctx, runner, log)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pod already exists
|
phase := string(pod.Status.Phase)
|
||||||
|
if phase == "" {
|
||||||
if !pod.ObjectMeta.DeletionTimestamp.IsZero() {
|
phase = "Created"
|
||||||
return r.processRunnerPodDeletion(ctx, runner, log, pod)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ready := runnerPodReady(&pod)
|
||||||
|
|
||||||
|
if runner.Status.Phase != phase || runner.Status.Ready != ready {
|
||||||
|
if pod.Status.Phase == corev1.PodRunning {
|
||||||
|
// Seeing this message, you can expect the runner to become `Running` soon.
|
||||||
|
log.V(1).Info(
|
||||||
|
"Runner appears to have been registered and running.",
|
||||||
|
"podCreationTimestamp", pod.CreationTimestamp,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
updated := runner.DeepCopy()
|
||||||
|
updated.Status.Phase = phase
|
||||||
|
updated.Status.Ready = ready
|
||||||
|
updated.Status.Reason = pod.Status.Reason
|
||||||
|
updated.Status.Message = pod.Status.Message
|
||||||
|
|
||||||
|
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
|
||||||
|
log.Error(err, "Failed to update runner status for Phase/Reason/Message")
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ctrl.Result{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runnerPodReady(pod *corev1.Pod) bool {
|
||||||
|
for _, c := range pod.Status.Conditions {
|
||||||
|
if c.Type != corev1.PodReady {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.Status == corev1.ConditionTrue
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func runnerContainerExitCode(pod *corev1.Pod) *int32 {
|
||||||
|
for _, status := range pod.Status.ContainerStatuses {
|
||||||
|
if status.Name != containerName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if status.State.Terminated != nil {
|
||||||
|
return &status.State.Terminated.ExitCode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool {
|
||||||
// If pod has ended up succeeded we need to restart it
|
// If pod has ended up succeeded we need to restart it
|
||||||
// Happens e.g. when dind is in runner and run completes
|
// Happens e.g. when dind is in runner and run completes
|
||||||
stopped := pod.Status.Phase == corev1.PodSucceeded
|
stopped := pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed
|
||||||
|
|
||||||
if !stopped {
|
if !stopped {
|
||||||
if pod.Status.Phase == corev1.PodRunning {
|
if pod.Status.Phase == corev1.PodRunning {
|
||||||
@@ -168,338 +193,55 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
|
if status.State.Terminated != nil {
|
||||||
stopped = true
|
stopped = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
restart := stopped
|
return stopped
|
||||||
|
|
||||||
if registrationOnly && stopped {
|
|
||||||
restart = false
|
|
||||||
|
|
||||||
log.Info(
|
|
||||||
"Observed that registration-only runner for scaling-from-zero has successfully stopped. " +
|
|
||||||
"Unlike other pods, this one will be recreated only when runner spec changes.",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
if updated, err := r.updateRegistrationToken(ctx, runner); err != nil {
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
} else if updated {
|
|
||||||
return ctrl.Result{Requeue: true}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
newPod, err := r.newPod(runner)
|
|
||||||
if err != nil {
|
|
||||||
log.Error(err, "Could not create pod")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if registrationOnly {
|
|
||||||
newPod.Spec.Containers[0].Env = append(
|
|
||||||
newPod.Spec.Containers[0].Env,
|
|
||||||
corev1.EnvVar{
|
|
||||||
Name: "RUNNER_REGISTRATION_ONLY",
|
|
||||||
Value: "true",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
var registrationRecheckDelay time.Duration
|
|
||||||
|
|
||||||
// all checks done below only decide whether a restart is needed
|
|
||||||
// if a restart was already decided before, there is no need for the checks
|
|
||||||
// saving API calls and scary log messages
|
|
||||||
if !restart {
|
|
||||||
registrationCheckInterval := time.Minute
|
|
||||||
if r.RegistrationRecheckInterval > 0 {
|
|
||||||
registrationCheckInterval = r.RegistrationRecheckInterval
|
|
||||||
}
|
|
||||||
|
|
||||||
// We want to call ListRunners GitHub Actions API only once per runner per minute.
|
|
||||||
// This if block, in conjunction with:
|
|
||||||
// return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
|
|
||||||
// achieves that.
|
|
||||||
if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil {
|
|
||||||
nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
|
|
||||||
now := time.Now()
|
|
||||||
|
|
||||||
// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
|
|
||||||
// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
|
|
||||||
// shorter than 1s
|
|
||||||
requeueAfter := nextCheckTime.Sub(now) - time.Second
|
|
||||||
if requeueAfter > 0 {
|
|
||||||
log.Info(
|
|
||||||
fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
|
|
||||||
"lastRegistrationCheckTime", lastCheckTime,
|
|
||||||
"registrationCheckInterval", registrationCheckInterval,
|
|
||||||
)
|
|
||||||
|
|
||||||
// Without RequeueAfter, the controller may not retry on scheduled. Instead, it must wait until the
|
|
||||||
// next sync period passes, which can be too much later than nextCheckTime.
|
|
||||||
//
|
|
||||||
// We need to requeue on this reconcilation even though we have already scheduled the initial
|
|
||||||
// requeue previously with `return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil`.
|
|
||||||
// Apparently, the workqueue used by controller-runtime seems to deduplicate and resets the delay on
|
|
||||||
// other requeues- so the initial scheduled requeue may have been reset due to requeue on
|
|
||||||
// spec/status change.
|
|
||||||
return ctrl.Result{RequeueAfter: requeueAfter}, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
notFound := false
|
|
||||||
offline := false
|
|
||||||
|
|
||||||
runnerBusy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
|
||||||
|
|
||||||
currentTime := time.Now()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
var notFoundException *github.RunnerNotFound
|
|
||||||
var offlineException *github.RunnerOffline
|
|
||||||
if errors.As(err, ¬FoundException) {
|
|
||||||
notFound = true
|
|
||||||
} else if errors.As(err, &offlineException) {
|
|
||||||
offline = true
|
|
||||||
} else {
|
|
||||||
var e *gogithub.RateLimitError
|
|
||||||
if errors.As(err, &e) {
|
|
||||||
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
|
||||||
// or the runner is still busy.
|
|
||||||
log.Error(
|
|
||||||
err,
|
|
||||||
fmt.Sprintf(
|
|
||||||
"Failed to check if runner is busy due to Github API rate limit. Retrying in %s to avoid excessive GitHub API calls",
|
|
||||||
retryDelayOnGitHubAPIRateLimitError,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// See the `newPod` function called above for more information
|
|
||||||
// about when this hash changes.
|
|
||||||
curHash := pod.Labels[LabelKeyPodTemplateHash]
|
|
||||||
newHash := newPod.Labels[LabelKeyPodTemplateHash]
|
|
||||||
|
|
||||||
if !runnerBusy && curHash != newHash {
|
|
||||||
restart = true
|
|
||||||
}
|
|
||||||
|
|
||||||
registrationTimeout := 10 * time.Minute
|
|
||||||
durationAfterRegistrationTimeout := currentTime.Sub(pod.CreationTimestamp.Add(registrationTimeout))
|
|
||||||
registrationDidTimeout := durationAfterRegistrationTimeout > 0
|
|
||||||
|
|
||||||
if notFound {
|
|
||||||
if registrationDidTimeout {
|
|
||||||
log.Info(
|
|
||||||
"Runner failed to register itself to GitHub in timely manner. "+
|
|
||||||
"Recreating the pod to see if it resolves the issue. "+
|
|
||||||
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
|
||||||
"See https://github.com/actions-runner-controller/actions-runner-controller/issues/288",
|
|
||||||
"podCreationTimestamp", pod.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
restart = true
|
|
||||||
} else {
|
|
||||||
log.V(1).Info(
|
|
||||||
"Runner pod exists but we failed to check if runner is busy. Apparently it still needs more time.",
|
|
||||||
"runnerName", runner.Name,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else if offline {
|
|
||||||
if registrationOnly {
|
|
||||||
log.Info(
|
|
||||||
"Observed that registration-only runner for scaling-from-zero has successfully been registered.",
|
|
||||||
"podCreationTimestamp", pod.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
} else if registrationDidTimeout {
|
|
||||||
if runnerBusy {
|
|
||||||
log.Info(
|
|
||||||
"Timeout out while waiting for the runner to be online, but observed that it's busy at the same time."+
|
|
||||||
"This is a known (unintuitive) behaviour of a runner that is already running a job. Please see https://github.com/actions-runner-controller/actions-runner-controller/issues/911",
|
|
||||||
"podCreationTimestamp", pod.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
log.Info(
|
|
||||||
"Already existing GitHub runner still appears offline . "+
|
|
||||||
"Recreating the pod to see if it resolves the issue. "+
|
|
||||||
"CAUTION: If you see this a lot, you should investigate the root cause. ",
|
|
||||||
"podCreationTimestamp", pod.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
restart = true
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.V(1).Info(
|
|
||||||
"Runner pod exists but the GitHub runner appears to be still offline. Waiting for runner to get online ...",
|
|
||||||
"runnerName", runner.Name,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (notFound || (offline && !registrationOnly)) && !registrationDidTimeout {
|
|
||||||
registrationRecheckJitter := 10 * time.Second
|
|
||||||
if r.RegistrationRecheckJitter > 0 {
|
|
||||||
registrationRecheckJitter = r.RegistrationRecheckJitter
|
|
||||||
}
|
|
||||||
|
|
||||||
registrationRecheckDelay = registrationCheckInterval + wait.Jitter(registrationRecheckJitter, 0.1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't do anything if there's no need to restart the runner
|
|
||||||
if !restart {
|
|
||||||
// This guard enables us to update runner.Status.Phase to `Running` only after
|
|
||||||
// the runner is registered to GitHub.
|
|
||||||
if registrationRecheckDelay > 0 {
|
|
||||||
log.V(1).Info(fmt.Sprintf("Rechecking the runner registration in %s", registrationRecheckDelay))
|
|
||||||
|
|
||||||
updated := runner.DeepCopy()
|
|
||||||
updated.Status.LastRegistrationCheckTime = &metav1.Time{Time: time.Now()}
|
|
||||||
|
|
||||||
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
|
|
||||||
log.Error(err, "Failed to update runner status for LastRegistrationCheckTime")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if runner.Status.Phase != string(pod.Status.Phase) {
|
|
||||||
if pod.Status.Phase == corev1.PodRunning {
|
|
||||||
// Seeing this message, you can expect the runner to become `Running` soon.
|
|
||||||
log.Info(
|
|
||||||
"Runner appears to have registered and running.",
|
|
||||||
"podCreationTimestamp", pod.CreationTimestamp,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
updated := runner.DeepCopy()
|
|
||||||
updated.Status.Phase = string(pod.Status.Phase)
|
|
||||||
updated.Status.Reason = pod.Status.Reason
|
|
||||||
updated.Status.Message = pod.Status.Message
|
|
||||||
|
|
||||||
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
|
|
||||||
log.Error(err, "Failed to update runner status for Phase/Reason/Message")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete current pod if recreation is needed
|
|
||||||
if err := r.Delete(ctx, &pod); err != nil {
|
|
||||||
log.Error(err, "Failed to delete pod resource")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", newPod.Name))
|
|
||||||
log.Info("Deleted runner pod", "repository", runner.Spec.Repository)
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger) (reconcile.Result, error) {
|
func ephemeralRunnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus {
|
||||||
|
if getRunnerEnv(pod, "RUNNER_EPHEMERAL") != "true" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, status := range pod.Status.ContainerStatuses {
|
||||||
|
if status.Name != containerName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
status := status
|
||||||
|
|
||||||
|
return &status
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) {
|
||||||
finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
|
finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
|
||||||
|
|
||||||
if removed {
|
if removed {
|
||||||
if len(runner.Status.Registration.Token) > 0 {
|
|
||||||
ok, err := r.unregisterRunner(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, &gogithub.RateLimitError{}) {
|
|
||||||
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
|
||||||
// or the runner is still busy.
|
|
||||||
log.Error(
|
|
||||||
err,
|
|
||||||
fmt.Sprintf(
|
|
||||||
"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls",
|
|
||||||
retryDelayOnGitHubAPIRateLimitError,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !ok {
|
|
||||||
log.V(1).Info("Runner no longer exists on GitHub")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.V(1).Info("Runner was never registered on GitHub")
|
|
||||||
}
|
|
||||||
|
|
||||||
newRunner := runner.DeepCopy()
|
newRunner := runner.DeepCopy()
|
||||||
newRunner.ObjectMeta.Finalizers = finalizers
|
newRunner.ObjectMeta.Finalizers = finalizers
|
||||||
|
|
||||||
if err := r.Patch(ctx, newRunner, client.MergeFrom(&runner)); err != nil {
|
if err := r.Patch(ctx, newRunner, client.MergeFrom(&runner)); err != nil {
|
||||||
log.Error(err, "Failed to update runner for finalizer removal")
|
log.Error(err, "Unable to remove finalizer")
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("Removed runner from GitHub", "repository", runner.Spec.Repository, "organization", runner.Spec.Organization)
|
log.Info("Removed finalizer")
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerReconciler) processRunnerPodDeletion(ctx context.Context, runner v1alpha1.Runner, log logr.Logger, pod corev1.Pod) (reconcile.Result, error) {
|
|
||||||
deletionTimeout := 1 * time.Minute
|
|
||||||
currentTime := time.Now()
|
|
||||||
deletionDidTimeout := currentTime.Sub(pod.DeletionTimestamp.Add(deletionTimeout)) > 0
|
|
||||||
|
|
||||||
if deletionDidTimeout {
|
|
||||||
log.Info(
|
|
||||||
fmt.Sprintf("Failed to delete pod within %s. ", deletionTimeout)+
|
|
||||||
"This is typically the case when a Kubernetes node became unreachable "+
|
|
||||||
"and the kube controller started evicting nodes. Forcefully deleting the pod to not get stuck.",
|
|
||||||
"podDeletionTimestamp", pod.DeletionTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredDeletionTimeout", deletionTimeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
var force int64 = 0
|
|
||||||
// forcefully delete runner as we would otherwise get stuck if the node stays unreachable
|
|
||||||
if err := r.Delete(ctx, &pod, &client.DeleteOptions{GracePeriodSeconds: &force}); err != nil {
|
|
||||||
// probably
|
|
||||||
if !kerrors.IsNotFound(err) {
|
|
||||||
log.Error(err, "Failed to forcefully delete pod resource ...")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
// forceful deletion finally succeeded
|
|
||||||
return ctrl.Result{Requeue: true}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Forcefully deleted pod '%s'", pod.Name))
|
|
||||||
log.Info("Forcefully deleted runner pod", "repository", runner.Spec.Repository)
|
|
||||||
// give kube manager a little time to forcefully delete the stuck pod
|
|
||||||
return ctrl.Result{RequeueAfter: 3 * time.Second}, nil
|
|
||||||
} else {
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1alpha1.Runner, log logr.Logger) (reconcile.Result, error) {
|
func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1alpha1.Runner, log logr.Logger) (reconcile.Result, error) {
|
||||||
if updated, err := r.updateRegistrationToken(ctx, runner); err != nil {
|
if updated, err := r.updateRegistrationToken(ctx, runner); err != nil {
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{RequeueAfter: RetryDelayOnCreateRegistrationError}, nil
|
||||||
} else if updated {
|
} else if updated {
|
||||||
return ctrl.Result{Requeue: true}, nil
|
return ctrl.Result{Requeue: true}, nil
|
||||||
}
|
}
|
||||||
@@ -528,37 +270,10 @@ func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1a
|
|||||||
|
|
||||||
r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodCreated", fmt.Sprintf("Created pod '%s'", newPod.Name))
|
r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodCreated", fmt.Sprintf("Created pod '%s'", newPod.Name))
|
||||||
log.Info("Created runner pod", "repository", runner.Spec.Repository)
|
log.Info("Created runner pod", "repository", runner.Spec.Repository)
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
|
||||||
runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
id := int64(0)
|
|
||||||
for _, runner := range runners {
|
|
||||||
if runner.GetName() == name {
|
|
||||||
if runner.GetBusy() {
|
|
||||||
return false, fmt.Errorf("runner is busy")
|
|
||||||
}
|
|
||||||
id = runner.GetID()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if id == int64(0) {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v1alpha1.Runner) (bool, error) {
|
func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v1alpha1.Runner) (bool, error) {
|
||||||
if runner.IsRegisterable() {
|
if runner.IsRegisterable() {
|
||||||
return false, nil
|
return false, nil
|
||||||
@@ -568,6 +283,10 @@ func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v
|
|||||||
|
|
||||||
rt, err := r.GitHubClient.GetRegistrationToken(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
rt, err := r.GitHubClient.GetRegistrationToken(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// An error can be a permanent, permission issue like the below:
|
||||||
|
// POST https://api.github.com/enterprises/YOUR_ENTERPRISE/actions/runners/registration-token: 403 Resource not accessible by integration []
|
||||||
|
// In such case retrying in seconds might not make much sense.
|
||||||
|
|
||||||
r.Recorder.Event(&runner, corev1.EventTypeWarning, "FailedUpdateRegistrationToken", "Updating registration token failed")
|
r.Recorder.Event(&runner, corev1.EventTypeWarning, "FailedUpdateRegistrationToken", "Updating registration token failed")
|
||||||
log.Error(err, "Failed to get new registration token")
|
log.Error(err, "Failed to get new registration token")
|
||||||
return false, err
|
return false, err
|
||||||
@@ -626,6 +345,11 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
|||||||
runner.ObjectMeta.Annotations,
|
runner.ObjectMeta.Annotations,
|
||||||
runner.Spec,
|
runner.Spec,
|
||||||
r.GitHubClient.GithubBaseURL,
|
r.GitHubClient.GithubBaseURL,
|
||||||
|
// Token change should trigger replacement.
|
||||||
|
// We need to include this explicitly here because
|
||||||
|
// runner.Spec does not contain the possibly updated token stored in the
|
||||||
|
// runner status yet.
|
||||||
|
runner.Status.Registration.Token,
|
||||||
)
|
)
|
||||||
|
|
||||||
objectMeta := metav1.ObjectMeta{
|
objectMeta := metav1.ObjectMeta{
|
||||||
@@ -639,31 +363,56 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
|||||||
|
|
||||||
if len(runner.Spec.Containers) == 0 {
|
if len(runner.Spec.Containers) == 0 {
|
||||||
template.Spec.Containers = append(template.Spec.Containers, corev1.Container{
|
template.Spec.Containers = append(template.Spec.Containers, corev1.Container{
|
||||||
Name: "runner",
|
Name: "runner",
|
||||||
ImagePullPolicy: runner.Spec.ImagePullPolicy,
|
|
||||||
EnvFrom: runner.Spec.EnvFrom,
|
|
||||||
Env: runner.Spec.Env,
|
|
||||||
Resources: runner.Spec.Resources,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if (runner.Spec.DockerEnabled == nil || *runner.Spec.DockerEnabled) && (runner.Spec.DockerdWithinRunnerContainer == nil || !*runner.Spec.DockerdWithinRunnerContainer) {
|
if (runner.Spec.DockerEnabled == nil || *runner.Spec.DockerEnabled) && (runner.Spec.DockerdWithinRunnerContainer == nil || !*runner.Spec.DockerdWithinRunnerContainer) {
|
||||||
template.Spec.Containers = append(template.Spec.Containers, corev1.Container{
|
template.Spec.Containers = append(template.Spec.Containers, corev1.Container{
|
||||||
Name: "docker",
|
Name: "docker",
|
||||||
VolumeMounts: runner.Spec.DockerVolumeMounts,
|
|
||||||
Resources: runner.Spec.DockerdContainerResources,
|
|
||||||
Env: runner.Spec.DockerEnv,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
template.Spec.Containers = runner.Spec.Containers
|
template.Spec.Containers = runner.Spec.Containers
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for i, c := range template.Spec.Containers {
|
||||||
|
switch c.Name {
|
||||||
|
case "runner":
|
||||||
|
if c.ImagePullPolicy == "" {
|
||||||
|
template.Spec.Containers[i].ImagePullPolicy = runner.Spec.ImagePullPolicy
|
||||||
|
}
|
||||||
|
if len(c.EnvFrom) == 0 {
|
||||||
|
template.Spec.Containers[i].EnvFrom = runner.Spec.EnvFrom
|
||||||
|
}
|
||||||
|
if len(c.Env) == 0 {
|
||||||
|
template.Spec.Containers[i].Env = runner.Spec.Env
|
||||||
|
}
|
||||||
|
if len(c.Resources.Requests) == 0 {
|
||||||
|
template.Spec.Containers[i].Resources.Requests = runner.Spec.Resources.Requests
|
||||||
|
}
|
||||||
|
if len(c.Resources.Limits) == 0 {
|
||||||
|
template.Spec.Containers[i].Resources.Limits = runner.Spec.Resources.Limits
|
||||||
|
}
|
||||||
|
case "docker":
|
||||||
|
if len(c.VolumeMounts) == 0 {
|
||||||
|
template.Spec.Containers[i].VolumeMounts = runner.Spec.DockerVolumeMounts
|
||||||
|
}
|
||||||
|
if len(c.Resources.Limits) == 0 {
|
||||||
|
template.Spec.Containers[i].Resources.Limits = runner.Spec.DockerdContainerResources.Limits
|
||||||
|
}
|
||||||
|
if len(c.Resources.Requests) == 0 {
|
||||||
|
template.Spec.Containers[i].Resources.Requests = runner.Spec.DockerdContainerResources.Requests
|
||||||
|
}
|
||||||
|
if len(c.Env) == 0 {
|
||||||
|
template.Spec.Containers[i].Env = runner.Spec.DockerEnv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template.Spec.SecurityContext = runner.Spec.SecurityContext
|
template.Spec.SecurityContext = runner.Spec.SecurityContext
|
||||||
template.Spec.EnableServiceLinks = runner.Spec.EnableServiceLinks
|
template.Spec.EnableServiceLinks = runner.Spec.EnableServiceLinks
|
||||||
|
|
||||||
registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly)
|
pod, err := newRunnerPod(runner.Name, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
|
||||||
|
|
||||||
pod, err := newRunnerPod(template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL, registrationOnly)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return pod, err
|
return pod, err
|
||||||
}
|
}
|
||||||
@@ -743,6 +492,10 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
|||||||
pod.Spec.HostAliases = runnerSpec.HostAliases
|
pod.Spec.HostAliases = runnerSpec.HostAliases
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if runnerSpec.DnsConfig != nil {
|
||||||
|
pod.Spec.DNSConfig = runnerSpec.DnsConfig
|
||||||
|
}
|
||||||
|
|
||||||
if runnerSpec.RuntimeClassName != nil {
|
if runnerSpec.RuntimeClassName != nil {
|
||||||
pod.Spec.RuntimeClassName = runnerSpec.RuntimeClassName
|
pod.Spec.RuntimeClassName = runnerSpec.RuntimeClassName
|
||||||
}
|
}
|
||||||
@@ -762,25 +515,18 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
|||||||
func mutatePod(pod *corev1.Pod, token string) *corev1.Pod {
|
func mutatePod(pod *corev1.Pod, token string) *corev1.Pod {
|
||||||
updated := pod.DeepCopy()
|
updated := pod.DeepCopy()
|
||||||
|
|
||||||
for i := range pod.Spec.Containers {
|
if getRunnerEnv(pod, EnvVarRunnerName) == "" {
|
||||||
if pod.Spec.Containers[i].Name == "runner" {
|
setRunnerEnv(updated, EnvVarRunnerName, pod.ObjectMeta.Name)
|
||||||
updated.Spec.Containers[i].Env = append(updated.Spec.Containers[i].Env,
|
}
|
||||||
corev1.EnvVar{
|
|
||||||
Name: "RUNNER_NAME",
|
if getRunnerEnv(pod, EnvVarRunnerToken) == "" {
|
||||||
Value: pod.ObjectMeta.Name,
|
setRunnerEnv(updated, EnvVarRunnerToken, token)
|
||||||
},
|
|
||||||
corev1.EnvVar{
|
|
||||||
Name: "RUNNER_TOKEN",
|
|
||||||
Value: token,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string, registrationOnly bool) (corev1.Pod, error) {
|
func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||||
var (
|
var (
|
||||||
privileged bool = true
|
privileged bool = true
|
||||||
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
|
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
|
||||||
@@ -789,6 +535,12 @@ func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, default
|
|||||||
dockerdInRunnerPrivileged bool = dockerdInRunner
|
dockerdInRunnerPrivileged bool = dockerdInRunner
|
||||||
)
|
)
|
||||||
|
|
||||||
|
template = *template.DeepCopy()
|
||||||
|
|
||||||
|
// This label selector is used by default when rd.Spec.Selector is empty.
|
||||||
|
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerName)
|
||||||
|
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
|
||||||
|
|
||||||
workDir := runnerSpec.WorkDir
|
workDir := runnerSpec.WorkDir
|
||||||
if workDir == "" {
|
if workDir == "" {
|
||||||
workDir = "/runner/_work"
|
workDir = "/runner/_work"
|
||||||
@@ -841,19 +593,11 @@ func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, default
|
|||||||
Value: workDir,
|
Value: workDir,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "RUNNER_EPHEMERAL",
|
Name: EnvVarEphemeral,
|
||||||
Value: fmt.Sprintf("%v", ephemeral),
|
Value: fmt.Sprintf("%v", ephemeral),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if registrationOnly {
|
|
||||||
env = append(env, corev1.EnvVar{
|
|
||||||
Name: "RUNNER_REGISTRATION_ONLY",
|
|
||||||
Value: "true",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
var seLinuxOptions *corev1.SELinuxOptions
|
var seLinuxOptions *corev1.SELinuxOptions
|
||||||
if template.Spec.SecurityContext != nil {
|
if template.Spec.SecurityContext != nil {
|
||||||
seLinuxOptions = template.Spec.SecurityContext.SELinuxOptions
|
seLinuxOptions = template.Spec.SecurityContext.SELinuxOptions
|
||||||
@@ -911,14 +655,15 @@ func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, default
|
|||||||
if runnerContainer.SecurityContext == nil {
|
if runnerContainer.SecurityContext == nil {
|
||||||
runnerContainer.SecurityContext = &corev1.SecurityContext{}
|
runnerContainer.SecurityContext = &corev1.SecurityContext{}
|
||||||
}
|
}
|
||||||
// Runner need to run privileged if it contains DinD
|
|
||||||
runnerContainer.SecurityContext.Privileged = &dockerdInRunnerPrivileged
|
if runnerContainer.SecurityContext.Privileged == nil {
|
||||||
|
// Runner need to run privileged if it contains DinD
|
||||||
|
runnerContainer.SecurityContext.Privileged = &dockerdInRunnerPrivileged
|
||||||
|
}
|
||||||
|
|
||||||
pod := template.DeepCopy()
|
pod := template.DeepCopy()
|
||||||
|
|
||||||
if pod.Spec.RestartPolicy == "" {
|
forceRunnerPodRestartPolicyNever(pod)
|
||||||
pod.Spec.RestartPolicy = "OnFailure"
|
|
||||||
}
|
|
||||||
|
|
||||||
if mtu := runnerSpec.DockerMTU; mtu != nil && dockerdInRunner {
|
if mtu := runnerSpec.DockerMTU; mtu != nil && dockerdInRunner {
|
||||||
runnerContainer.Env = append(runnerContainer.Env, []corev1.EnvVar{
|
runnerContainer.Env = append(runnerContainer.Env, []corev1.EnvVar{
|
||||||
@@ -996,13 +741,18 @@ func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, default
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pod.Spec.Volumes = append(pod.Spec.Volumes,
|
if ok, _ := workVolumePresent(pod.Spec.Volumes); !ok {
|
||||||
corev1.Volume{
|
pod.Spec.Volumes = append(pod.Spec.Volumes,
|
||||||
Name: "work",
|
corev1.Volume{
|
||||||
VolumeSource: corev1.VolumeSource{
|
Name: "work",
|
||||||
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
VolumeSource: corev1.VolumeSource{
|
||||||
|
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pod.Spec.Volumes = append(pod.Spec.Volumes,
|
||||||
corev1.Volume{
|
corev1.Volume{
|
||||||
Name: "certs-client",
|
Name: "certs-client",
|
||||||
VolumeSource: corev1.VolumeSource{
|
VolumeSource: corev1.VolumeSource{
|
||||||
@@ -1011,11 +761,16 @@ func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, default
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ok, _ := workVolumeMountPresent(runnerContainer.VolumeMounts); !ok {
|
||||||
|
runnerContainer.VolumeMounts = append(runnerContainer.VolumeMounts,
|
||||||
|
corev1.VolumeMount{
|
||||||
|
Name: "work",
|
||||||
|
MountPath: workDir,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
runnerContainer.VolumeMounts = append(runnerContainer.VolumeMounts,
|
runnerContainer.VolumeMounts = append(runnerContainer.VolumeMounts,
|
||||||
corev1.VolumeMount{
|
|
||||||
Name: "work",
|
|
||||||
MountPath: workDir,
|
|
||||||
},
|
|
||||||
corev1.VolumeMount{
|
corev1.VolumeMount{
|
||||||
Name: "certs-client",
|
Name: "certs-client",
|
||||||
MountPath: "/certs/client",
|
MountPath: "/certs/client",
|
||||||
|
|||||||
414
controllers/runner_graceful_stop.go
Normal file
414
controllers/runner_graceful_stop.go
Normal file
@@ -0,0 +1,414 @@
|
|||||||
|
package controllers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/actions-runner-controller/actions-runner-controller/github"
|
||||||
|
"github.com/go-logr/logr"
|
||||||
|
gogithub "github.com/google/go-github/v39/github"
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
// tickRunnerGracefulStop reconciles the runner and the runner pod in a way so that
|
||||||
|
// we can delete the runner pod without disrupting a workflow job.
|
||||||
|
//
|
||||||
|
// This function returns a non-nil pointer to corev1.Pod as the first return value
|
||||||
|
// if the runner is considered to have gracefully stopped, hence it's pod is safe for deletion.
|
||||||
|
//
|
||||||
|
// It's a "tick" operation so a graceful stop can take multiple calls to complete.
|
||||||
|
// This function is designed to complete a lengthy graceful stop process in a unblocking way.
|
||||||
|
// When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value.
|
||||||
|
// The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry.
|
||||||
|
func tickRunnerGracefulStop(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
||||||
|
pod, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationStartTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
if err != nil {
|
||||||
|
return nil, &ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if res, err := ensureRunnerUnregistration(ctx, retryDelay, log, ghClient, c, enterprise, organization, repository, runner, pod); res != nil {
|
||||||
|
return nil, res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
pod, err = annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
if err != nil {
|
||||||
|
return nil, &ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return pod, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// annotatePodOnce annotates the pod if it wasn't.
|
||||||
|
// Returns the provided pod as-is if it was already annotated.
|
||||||
|
// Returns the updated pod if the pod was missing the annotation and the update to add the annotation succeeded.
|
||||||
|
func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod *corev1.Pod, k, v string) (*corev1.Pod, error) {
|
||||||
|
if pod == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := getAnnotation(pod, k); ok {
|
||||||
|
return pod, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
updated := pod.DeepCopy()
|
||||||
|
setAnnotation(&updated.ObjectMeta, k, v)
|
||||||
|
if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
|
||||||
|
log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", k))
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Annotated pod", "key", k, "value", v)
|
||||||
|
|
||||||
|
return updated, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the first return value is nil, it's safe to delete the runner pod.
|
||||||
|
func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
|
||||||
|
var runnerID *int64
|
||||||
|
|
||||||
|
if id, ok := getAnnotation(pod, AnnotationKeyRunnerID); ok {
|
||||||
|
v, err := strconv.ParseInt(id, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return &ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
runnerID = &v
|
||||||
|
}
|
||||||
|
|
||||||
|
if runnerID == nil {
|
||||||
|
runner, err := getRunner(ctx, ghClient, enterprise, organization, repository, runner)
|
||||||
|
if err != nil {
|
||||||
|
return &ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if runner != nil && runner.ID != nil {
|
||||||
|
runnerID = runner.ID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
code := runnerContainerExitCode(pod)
|
||||||
|
|
||||||
|
if pod != nil && pod.Annotations[AnnotationKeyUnregistrationCompleteTimestamp] != "" {
|
||||||
|
// If it's already unregistered in the previous reconcilation loop,
|
||||||
|
// you can safely assume that it won't get registered again so it's safe to delete the runner pod.
|
||||||
|
log.Info("Runner pod is marked as already unregistered.")
|
||||||
|
} else if runnerID == nil {
|
||||||
|
log.Info(
|
||||||
|
"Unregistration started before runner ID is assigned. " +
|
||||||
|
"Perhaps the runner pod was terminated by anyone other than ARC? Was it OOM killed? " +
|
||||||
|
"Marking unregistration as completed anyway because there's nothing ARC can do.",
|
||||||
|
)
|
||||||
|
} else if pod != nil && runnerPodOrContainerIsStopped(pod) {
|
||||||
|
// If it's an ephemeral runner with the actions/runner container exited with 0,
|
||||||
|
// we can safely assume that it has unregistered itself from GitHub Actions
|
||||||
|
// so it's natural that RemoveRunner fails due to 404.
|
||||||
|
|
||||||
|
// If pod has ended up succeeded we need to restart it
|
||||||
|
// Happens e.g. when dind is in runner and run completes
|
||||||
|
log.Info("Runner pod has been stopped with a successful status.")
|
||||||
|
} else if pod != nil && pod.Annotations[AnnotationKeyRunnerCompletionWaitStartTimestamp] != "" {
|
||||||
|
ct := ephemeralRunnerContainerStatus(pod)
|
||||||
|
if ct == nil {
|
||||||
|
log.Info("Runner pod is annotated to wait for completion, and the runner container is not ephemeral")
|
||||||
|
|
||||||
|
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lts := ct.LastTerminationState.Terminated
|
||||||
|
if lts == nil {
|
||||||
|
log.Info("Runner pod is annotated to wait for completion, and the runner container is not restarting")
|
||||||
|
|
||||||
|
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prevent runner pod from stucking in Terminating.
|
||||||
|
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369
|
||||||
|
log.Info("Deleting runner pod anyway because it has stopped prematurely. This may leave a dangling runner resource in GitHub Actions",
|
||||||
|
"lastState.exitCode", lts.ExitCode,
|
||||||
|
"lastState.message", lts.Message,
|
||||||
|
"pod.phase", pod.Status.Phase,
|
||||||
|
)
|
||||||
|
} else if ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, *runnerID); err != nil {
|
||||||
|
if errors.Is(err, &gogithub.RateLimitError{}) {
|
||||||
|
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
||||||
|
// or the runner is still busy.
|
||||||
|
log.Error(
|
||||||
|
err,
|
||||||
|
fmt.Sprintf(
|
||||||
|
"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls",
|
||||||
|
retryDelayOnGitHubAPIRateLimitError,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
return &ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err)
|
||||||
|
|
||||||
|
var runnerBusy bool
|
||||||
|
|
||||||
|
errRes := &gogithub.ErrorResponse{}
|
||||||
|
if errors.As(err, &errRes) {
|
||||||
|
if errRes.Response.StatusCode == 403 {
|
||||||
|
log.Error(err, "Unable to unregister due to permission error. "+
|
||||||
|
"Perhaps you've changed the permissions of PAT or GitHub App, or you updated authentication method of ARC in a wrong way? "+
|
||||||
|
"ARC considers it as already unregistered and continue removing the pod. "+
|
||||||
|
"You may need to remove the runner on GitHub UI.")
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
runner, _ := getRunner(ctx, ghClient, enterprise, organization, repository, runner)
|
||||||
|
|
||||||
|
var runnerID int64
|
||||||
|
|
||||||
|
if runner != nil && runner.ID != nil {
|
||||||
|
runnerID = *runner.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
runnerBusy = errRes.Response.StatusCode == 422
|
||||||
|
|
||||||
|
if runnerBusy && code != nil {
|
||||||
|
log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+
|
||||||
|
"This can happen when the runner container crashed due to an unhandled error, OOM, etc. "+
|
||||||
|
"ARC terminates the pod anyway. You'd probably need to manually delete the runner later by calling the GitHub API",
|
||||||
|
"runnerExitCode", *code,
|
||||||
|
"runnerID", runnerID,
|
||||||
|
)
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if runnerBusy {
|
||||||
|
// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
|
||||||
|
// work as the reconcilation can happen earlier due to pod status update.
|
||||||
|
// For ephemeral runners, we can expect it to stop and unregister itself on completion.
|
||||||
|
// So we can just wait for the completion without actively retrying unregistration.
|
||||||
|
ephemeral := getRunnerEnv(pod, EnvVarEphemeral)
|
||||||
|
if ephemeral == "true" {
|
||||||
|
_, err = annotatePodOnce(ctx, c, log, pod, AnnotationKeyRunnerCompletionWaitStartTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
if err != nil {
|
||||||
|
return &ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ctrl.Result{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Retrying runner unregistration because the static runner is still busy")
|
||||||
|
// Otherwise we may end up spamming 422 errors,
|
||||||
|
// each call consuming GitHub API rate limit
|
||||||
|
// https://github.com/actions-runner-controller/actions-runner-controller/pull/1167#issuecomment-1064213271
|
||||||
|
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ctrl.Result{}, err
|
||||||
|
} else if ok {
|
||||||
|
log.Info("Runner has just been unregistered.")
|
||||||
|
} else if pod == nil {
|
||||||
|
// `r.unregisterRunner()` will returns `false, nil` if the runner is not found on GitHub.
|
||||||
|
// However, that doesn't always mean the pod can be safely removed.
|
||||||
|
//
|
||||||
|
// If the pod does not exist for the runner,
|
||||||
|
// it may be due to that the runner pod has never been created.
|
||||||
|
// In that case we can safely assume that the runner will never be registered.
|
||||||
|
|
||||||
|
log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.")
|
||||||
|
} else if ts := pod.Annotations[AnnotationKeyUnregistrationStartTimestamp]; ts != "" {
|
||||||
|
log.Info("Runner unregistration is in-progress. It can take forever to complete if if it's a static runner constantly running jobs."+
|
||||||
|
" It can also take very long time if it's an ephemeral runner that is running a log-running job.", "error", err)
|
||||||
|
|
||||||
|
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||||
|
} else {
|
||||||
|
// A runner and a runner pod that is created by this version of ARC should match
|
||||||
|
// any of the above branches.
|
||||||
|
//
|
||||||
|
// But we leave this match all branch for potential backward-compatibility.
|
||||||
|
// The caller is expected to take appropriate actions, like annotating the pod as started the unregistration process,
|
||||||
|
// and retry later.
|
||||||
|
log.V(1).Info("Runner unregistration is being retried later.")
|
||||||
|
|
||||||
|
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ensureRunnerPodRegistered(ctx context.Context, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
||||||
|
_, hasRunnerID := getAnnotation(pod, AnnotationKeyRunnerID)
|
||||||
|
if runnerPodOrContainerIsStopped(pod) || hasRunnerID {
|
||||||
|
return pod, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := getRunner(ctx, ghClient, enterprise, organization, repository, runner)
|
||||||
|
if err != nil {
|
||||||
|
return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if r == nil || r.ID == nil {
|
||||||
|
return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
id := *r.ID
|
||||||
|
|
||||||
|
updated, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyRunnerID, fmt.Sprintf("%d", id))
|
||||||
|
if err != nil {
|
||||||
|
return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return updated, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getAnnotation(obj client.Object, key string) (string, bool) {
|
||||||
|
if obj.GetAnnotations() == nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
v, ok := obj.GetAnnotations()[key]
|
||||||
|
|
||||||
|
return v, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func setAnnotation(meta *metav1.ObjectMeta, key, value string) {
|
||||||
|
if meta.Annotations == nil {
|
||||||
|
meta.Annotations = map[string]string{}
|
||||||
|
}
|
||||||
|
|
||||||
|
meta.Annotations[key] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
func podConditionTransitionTime(pod *corev1.Pod, tpe corev1.PodConditionType, v corev1.ConditionStatus) *metav1.Time {
|
||||||
|
for _, c := range pod.Status.Conditions {
|
||||||
|
if c.Type == tpe && c.Status == v {
|
||||||
|
return &c.LastTransitionTime
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func podConditionTransitionTimeAfter(pod *corev1.Pod, tpe corev1.PodConditionType, d time.Duration) bool {
|
||||||
|
c := podConditionTransitionTime(pod, tpe, corev1.ConditionTrue)
|
||||||
|
if c == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.Add(d).Before(time.Now())
|
||||||
|
}
|
||||||
|
|
||||||
|
func podRunnerID(pod *corev1.Pod) string {
|
||||||
|
id, _ := getAnnotation(pod, AnnotationKeyRunnerID)
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
func getRunnerEnv(pod *corev1.Pod, key string) string {
|
||||||
|
for _, c := range pod.Spec.Containers {
|
||||||
|
if c.Name == containerName {
|
||||||
|
for _, e := range c.Env {
|
||||||
|
if e.Name == key {
|
||||||
|
return e.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func setRunnerEnv(pod *corev1.Pod, key, value string) {
|
||||||
|
for i := range pod.Spec.Containers {
|
||||||
|
c := pod.Spec.Containers[i]
|
||||||
|
if c.Name == containerName {
|
||||||
|
for j, env := range c.Env {
|
||||||
|
if env.Name == key {
|
||||||
|
pod.Spec.Containers[i].Env[j].Value = value
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pod.Spec.Containers[i].Env = append(c.Env, corev1.EnvVar{Name: key, Value: value})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unregisterRunner unregisters the runner from GitHub Actions by name.
|
||||||
|
//
|
||||||
|
// This function returns:
|
||||||
|
//
|
||||||
|
// Case 1. (true, nil) when it has successfully unregistered the runner.
|
||||||
|
// Case 2. (false, nil) when (2-1.) the runner has been already unregistered OR (2-2.) the runner will never be created OR (2-3.) the runner is not created yet and it is about to be registered(hence we couldn't see it's existence from GitHub Actions API yet)
|
||||||
|
// Case 3. (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
|
||||||
|
// an error returned by GitHub API.
|
||||||
|
//
|
||||||
|
// When the returned values is "Case 2. (false, nil)", the caller must handle the three possible sub-cases appropriately.
|
||||||
|
// In other words, all those three sub-cases cannot be distinguished by this function alone.
|
||||||
|
//
|
||||||
|
// - Case "2-1." can happen when e.g. ARC has successfully unregistered in a previous reconcilation loop or it was an ephemeral runner that finished it's job run(an ephemeral runner is designed to stop after a job run).
|
||||||
|
// You'd need to maintain the runner state(i.e. if it's already unregistered or not) somewhere,
|
||||||
|
// so that you can either not call this function at all if the runner state says it's already unregistered, or determine that it's case "2-1." when you got (false, nil).
|
||||||
|
//
|
||||||
|
// - Case "2-2." can happen when e.g. the runner registration token was somehow broken so that `config.sh` within the runner container was never meant to succeed.
|
||||||
|
// Waiting and retrying forever on this case is not a solution, because `config.sh` won't succeed with a wrong token hence the runner gets stuck in this state forever.
|
||||||
|
// There isn't a perfect solution to this, but a practical workaround would be implement a "grace period" in the caller side.
|
||||||
|
//
|
||||||
|
// - Case "2-3." can happen when e.g. ARC recreated an ephemral runner pod in a previous reconcilation loop and then it was requested to delete the runner before the runner comes up.
|
||||||
|
// If handled inappropriately, this can cause a race condition betweeen a deletion of the runner pod and GitHub scheduling a workflow job onto the runner.
|
||||||
|
//
|
||||||
|
// Once successfully detected case "2-1." or "2-2.", you can safely delete the runner pod because you know that the runner won't come back
|
||||||
|
// as long as you recreate the runner pod.
|
||||||
|
//
|
||||||
|
// If it was "2-3.", you need a workaround to avoid the race condition.
|
||||||
|
//
|
||||||
|
// You shall introduce a "grace period" mechanism, similar or equal to that is required for "Case 2-2.", so that you ever
|
||||||
|
// start the runner pod deletion only after it's more and more likely that the runner pod is not coming up.
|
||||||
|
//
|
||||||
|
// Beware though, you need extra care to set an appropriate grace period depending on your environment.
|
||||||
|
// There isn't a single right grace period that works for everyone.
|
||||||
|
// The longer the grace period is, the earlier a cluster resource shortage can occur due to throttoled runner pod deletions,
|
||||||
|
// while the shorter the grace period is, the more likely you may encounter the race issue.
|
||||||
|
func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo string, id int64) (bool, error) {
|
||||||
|
// For the record, historically ARC did not try to call RemoveRunner on a busy runner, but it's no longer true.
|
||||||
|
// The reason ARC did so was to let a runner running a job to not stop prematurely.
|
||||||
|
//
|
||||||
|
// However, we learned that RemoveRunner already has an ability to prevent stopping a busy runner,
|
||||||
|
// so ARC doesn't need to do anything special for a graceful runner stop.
|
||||||
|
// It can just call RemoveRunner, and if it returned 200 you're guaranteed that the runner will not automatically come back and
|
||||||
|
// the runner pod is safe for deletion.
|
||||||
|
//
|
||||||
|
// Trying to remove a busy runner can result in errors like the following:
|
||||||
|
// failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
|
||||||
|
//
|
||||||
|
// # NOTES
|
||||||
|
//
|
||||||
|
// - It can be "status=offline" at the same time but that's another story.
|
||||||
|
// - After https://github.com/actions-runner-controller/actions-runner-controller/pull/1127, ListRunners responses that are used to
|
||||||
|
// determine if the runner is busy can be more outdated than before, as those responeses are now cached for 60 seconds.
|
||||||
|
// - Note that 60 seconds is controlled by the Cache-Control response header provided by GitHub so we don't have a strict control on it but we assume it won't
|
||||||
|
// change from 60 seconds.
|
||||||
|
//
|
||||||
|
// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
|
||||||
|
if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (*gogithub.Runner, error) {
|
||||||
|
runners, err := client.ListRunners(ctx, enterprise, org, repo)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, runner := range runners {
|
||||||
|
if runner.GetName() == name {
|
||||||
|
return runner, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
22
controllers/runner_pod.go
Normal file
22
controllers/runner_pod.go
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
package controllers
|
||||||
|
|
||||||
|
import corev1 "k8s.io/api/core/v1"
|
||||||
|
|
||||||
|
// Force the runner pod managed by either RunnerDeployment and RunnerSet to have restartPolicy=Never.
|
||||||
|
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369 for more context.
|
||||||
|
//
|
||||||
|
// This is to prevent runner pods from stucking in Terminating when a K8s node disappeared along with the runnr pod and the runner container within it.
|
||||||
|
//
|
||||||
|
// Previously, we used restartPolicy of OnFailure, it turned wrong later, and therefore we now set Never.
|
||||||
|
//
|
||||||
|
// When the restartPolicy is OnFailure and the node disappeared, runner pods on the node seem to stuck in state.terminated==nil, state.waiting!=nil, and state.lastTerminationState!=nil,
|
||||||
|
// and will ever become Running.
|
||||||
|
// It's probably due to that the node onto which the pods have been scheduled will ever come back, hence the container restart attempt swill ever succeed,
|
||||||
|
// the pods stuck waiting for successful restarts forever.
|
||||||
|
//
|
||||||
|
// By forcing runner pods to never restart, we hope there will be no chances of pods being stuck waiting.
|
||||||
|
func forceRunnerPodRestartPolicyNever(pod *corev1.Pod) {
|
||||||
|
if pod.Spec.RestartPolicy != corev1.RestartPolicyNever {
|
||||||
|
pod.Spec.RestartPolicy = corev1.RestartPolicyNever
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -23,8 +23,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-logr/logr"
|
"github.com/go-logr/logr"
|
||||||
gogithub "github.com/google/go-github/v39/github"
|
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
|
|
||||||
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
@@ -47,16 +45,10 @@ type RunnerPodReconciler struct {
|
|||||||
Name string
|
Name string
|
||||||
RegistrationRecheckInterval time.Duration
|
RegistrationRecheckInterval time.Duration
|
||||||
RegistrationRecheckJitter time.Duration
|
RegistrationRecheckJitter time.Duration
|
||||||
|
|
||||||
|
UnregistrationRetryDelay time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
|
||||||
// This names requires at least one slash to work.
|
|
||||||
// See https://github.com/google/knative-gcp/issues/378
|
|
||||||
runnerPodFinalizerName = "actions.summerwind.dev/runner-pod"
|
|
||||||
|
|
||||||
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
|
||||||
)
|
|
||||||
|
|
||||||
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete
|
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete
|
||||||
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||||
|
|
||||||
@@ -73,9 +65,19 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var envvars []corev1.EnvVar
|
||||||
|
for _, container := range runnerPod.Spec.Containers {
|
||||||
|
if container.Name == "runner" {
|
||||||
|
envvars = container.Env
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(envvars) == 0 {
|
||||||
|
return ctrl.Result{}, errors.New("Could not determine env vars for runner Pod")
|
||||||
|
}
|
||||||
|
|
||||||
var enterprise, org, repo string
|
var enterprise, org, repo string
|
||||||
|
|
||||||
envvars := runnerPod.Spec.Containers[0].Env
|
|
||||||
for _, e := range envvars {
|
for _, e := range envvars {
|
||||||
switch e.Name {
|
switch e.Name {
|
||||||
case EnvVarEnterprise:
|
case EnvVarEnterprise:
|
||||||
@@ -99,44 +101,36 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Added finalizer")
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
log.V(2).Info("Seen deletion-timestamp is already set")
|
||||||
|
|
||||||
finalizers, removed := removeFinalizer(runnerPod.ObjectMeta.Finalizers, runnerPodFinalizerName)
|
finalizers, removed := removeFinalizer(runnerPod.ObjectMeta.Finalizers, runnerPodFinalizerName)
|
||||||
|
|
||||||
if removed {
|
if removed {
|
||||||
ok, err := r.unregisterRunner(ctx, enterprise, org, repo, runnerPod.Name)
|
// In a standard scenario, the upstream controller, like runnerset-controller, ensures this runner to be gracefully stopped before the deletion timestamp is set.
|
||||||
if err != nil {
|
// But for the case that the user manually deleted it for whatever reason,
|
||||||
if errors.Is(err, &gogithub.RateLimitError{}) {
|
// we have to ensure it to gracefully stop now.
|
||||||
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
|
||||||
// or the runner is still busy.
|
if res != nil {
|
||||||
log.Error(
|
return *res, err
|
||||||
err,
|
|
||||||
fmt.Sprintf(
|
|
||||||
"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls",
|
|
||||||
retryDelayOnGitHubAPIRateLimitError,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if !ok {
|
patchedPod := updatedPod.DeepCopy()
|
||||||
log.V(1).Info("Runner no longer exists on GitHub")
|
patchedPod.ObjectMeta.Finalizers = finalizers
|
||||||
}
|
|
||||||
|
|
||||||
newRunner := runnerPod.DeepCopy()
|
// We commit the removal of the finalizer so that Kuberenetes notices it and delete the pod resource from the cluster.
|
||||||
newRunner.ObjectMeta.Finalizers = finalizers
|
if err := r.Patch(ctx, patchedPod, client.MergeFrom(&runnerPod)); err != nil {
|
||||||
|
|
||||||
if err := r.Patch(ctx, newRunner, client.MergeFrom(&runnerPod)); err != nil {
|
|
||||||
log.Error(err, "Failed to update runner for finalizer removal")
|
log.Error(err, "Failed to update runner for finalizer removal")
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("Removed runner from GitHub", "repository", repo, "organization", org)
|
log.V(2).Info("Removed finalizer")
|
||||||
|
|
||||||
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
deletionTimeout := 1 * time.Minute
|
deletionTimeout := 1 * time.Minute
|
||||||
@@ -174,246 +168,45 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// If pod has ended up succeeded we need to restart it
|
po, res, err := ensureRunnerPodRegistered(ctx, log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
|
||||||
// Happens e.g. when dind is in runner and run completes
|
if res != nil {
|
||||||
stopped := runnerPod.Status.Phase == corev1.PodSucceeded
|
return *res, err
|
||||||
|
|
||||||
if !stopped {
|
|
||||||
if runnerPod.Status.Phase == corev1.PodRunning {
|
|
||||||
for _, status := range runnerPod.Status.ContainerStatuses {
|
|
||||||
if status.Name != containerName {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
|
|
||||||
stopped = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
restart := stopped
|
runnerPod = *po
|
||||||
|
|
||||||
var registrationRecheckDelay time.Duration
|
if _, unregistrationRequested := getAnnotation(&runnerPod, AnnotationKeyUnregistrationRequestTimestamp); unregistrationRequested {
|
||||||
|
log.V(2).Info("Progressing unregistration because unregistration-request timestamp is set")
|
||||||
|
|
||||||
// all checks done below only decide whether a restart is needed
|
// At this point we're sure that DeletionTimestamp is not set yet, but the unregistration process is triggered by an upstream controller like runnerset-controller.
|
||||||
// if a restart was already decided before, there is no need for the checks
|
//
|
||||||
// saving API calls and scary log messages
|
// In a standard scenario, ARC starts the unregistration process before marking the pod for deletion at all,
|
||||||
if !restart {
|
// so that it isn't subject to terminationGracePeriod and can safely take hours to finish it's work.
|
||||||
registrationCheckInterval := time.Minute
|
_, res, err := tickRunnerGracefulStop(ctx, r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
|
||||||
if r.RegistrationRecheckInterval > 0 {
|
if res != nil {
|
||||||
registrationCheckInterval = r.RegistrationRecheckInterval
|
return *res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
lastCheckTimeStr := runnerPod.Annotations[AnnotationKeyLastRegistrationCheckTime]
|
// At this point we are sure that the runner has successfully unregistered, hence is safe to be deleted.
|
||||||
|
// But we don't delete the pod here. Instead, let the upstream controller/parent object to delete this pod as
|
||||||
var lastCheckTime *time.Time
|
// a part of a cascade deletion.
|
||||||
|
// This is to avoid a parent object, like statefulset, to recreate the deleted pod.
|
||||||
if lastCheckTimeStr != "" {
|
// If the pod was recreated, it will start a registration process and that may race with the statefulset deleting the pod.
|
||||||
t, err := time.Parse(time.RFC3339, lastCheckTimeStr)
|
log.V(2).Info("Unregistration seems complete")
|
||||||
if err != nil {
|
|
||||||
log.Error(err, "failed to parase last check time %q", lastCheckTimeStr)
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
lastCheckTime = &t
|
|
||||||
}
|
|
||||||
|
|
||||||
// We want to call ListRunners GitHub Actions API only once per runner per minute.
|
|
||||||
// This if block, in conjunction with:
|
|
||||||
// return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
|
|
||||||
// achieves that.
|
|
||||||
if lastCheckTime != nil {
|
|
||||||
nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
|
|
||||||
now := time.Now()
|
|
||||||
|
|
||||||
// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
|
|
||||||
// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
|
|
||||||
// shorter than 1s
|
|
||||||
requeueAfter := nextCheckTime.Sub(now) - time.Second
|
|
||||||
if requeueAfter > 0 {
|
|
||||||
log.Info(
|
|
||||||
fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
|
|
||||||
"lastRegistrationCheckTime", lastCheckTime,
|
|
||||||
"registrationCheckInterval", registrationCheckInterval,
|
|
||||||
)
|
|
||||||
|
|
||||||
// Without RequeueAfter, the controller may not retry on scheduled. Instead, it must wait until the
|
|
||||||
// next sync period passes, which can be too much later than nextCheckTime.
|
|
||||||
//
|
|
||||||
// We need to requeue on this reconcilation even though we have already scheduled the initial
|
|
||||||
// requeue previously with `return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil`.
|
|
||||||
// Apparently, the workqueue used by controller-runtime seems to deduplicate and resets the delay on
|
|
||||||
// other requeues- so the initial scheduled requeue may have been reset due to requeue on
|
|
||||||
// spec/status change.
|
|
||||||
return ctrl.Result{RequeueAfter: requeueAfter}, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
notFound := false
|
|
||||||
offline := false
|
|
||||||
|
|
||||||
_, err := r.GitHubClient.IsRunnerBusy(ctx, enterprise, org, repo, runnerPod.Name)
|
|
||||||
|
|
||||||
currentTime := time.Now()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
var notFoundException *github.RunnerNotFound
|
|
||||||
var offlineException *github.RunnerOffline
|
|
||||||
if errors.As(err, ¬FoundException) {
|
|
||||||
notFound = true
|
|
||||||
} else if errors.As(err, &offlineException) {
|
|
||||||
offline = true
|
|
||||||
} else {
|
|
||||||
var e *gogithub.RateLimitError
|
|
||||||
if errors.As(err, &e) {
|
|
||||||
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
|
||||||
// or the runner is still busy.
|
|
||||||
log.Error(
|
|
||||||
err,
|
|
||||||
fmt.Sprintf(
|
|
||||||
"Failed to check if runner is busy due to Github API rate limit. Retrying in %s to avoid excessive GitHub API calls",
|
|
||||||
retryDelayOnGitHubAPIRateLimitError,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
registrationTimeout := 10 * time.Minute
|
|
||||||
durationAfterRegistrationTimeout := currentTime.Sub(runnerPod.CreationTimestamp.Add(registrationTimeout))
|
|
||||||
registrationDidTimeout := durationAfterRegistrationTimeout > 0
|
|
||||||
|
|
||||||
if notFound {
|
|
||||||
if registrationDidTimeout {
|
|
||||||
log.Info(
|
|
||||||
"Runner failed to register itself to GitHub in timely manner. "+
|
|
||||||
"Recreating the pod to see if it resolves the issue. "+
|
|
||||||
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
|
||||||
"See https://github.com/actions-runner-controller/actions-runner-controller/issues/288",
|
|
||||||
"podCreationTimestamp", runnerPod.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
restart = true
|
|
||||||
} else {
|
|
||||||
log.V(1).Info(
|
|
||||||
"Runner pod exists but we failed to check if runner is busy. Apparently it still needs more time.",
|
|
||||||
"runnerName", runnerPod.Name,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else if offline {
|
|
||||||
if registrationDidTimeout {
|
|
||||||
log.Info(
|
|
||||||
"Already existing GitHub runner still appears offline . "+
|
|
||||||
"Recreating the pod to see if it resolves the issue. "+
|
|
||||||
"CAUTION: If you see this a lot, you should investigate the root cause. ",
|
|
||||||
"podCreationTimestamp", runnerPod.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
restart = true
|
|
||||||
} else {
|
|
||||||
log.V(1).Info(
|
|
||||||
"Runner pod exists but the GitHub runner appears to be still offline. Waiting for runner to get online ...",
|
|
||||||
"runnerName", runnerPod.Name,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (notFound || offline) && !registrationDidTimeout {
|
|
||||||
registrationRecheckJitter := 10 * time.Second
|
|
||||||
if r.RegistrationRecheckJitter > 0 {
|
|
||||||
registrationRecheckJitter = r.RegistrationRecheckJitter
|
|
||||||
}
|
|
||||||
|
|
||||||
registrationRecheckDelay = registrationCheckInterval + wait.Jitter(registrationRecheckJitter, 0.1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't do anything if there's no need to restart the runner
|
|
||||||
if !restart {
|
|
||||||
// This guard enables us to update runner.Status.Phase to `Running` only after
|
|
||||||
// the runner is registered to GitHub.
|
|
||||||
if registrationRecheckDelay > 0 {
|
|
||||||
log.V(1).Info(fmt.Sprintf("Rechecking the runner registration in %s", registrationRecheckDelay))
|
|
||||||
|
|
||||||
updated := runnerPod.DeepCopy()
|
|
||||||
t := time.Now().Format(time.RFC3339)
|
|
||||||
updated.Annotations[AnnotationKeyLastRegistrationCheckTime] = t
|
|
||||||
|
|
||||||
if err := r.Patch(ctx, updated, client.MergeFrom(&runnerPod)); err != nil {
|
|
||||||
log.Error(err, "Failed to update runner pod annotation for LastRegistrationCheckTime")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Seeing this message, you can expect the runner to become `Running` soon.
|
|
||||||
log.Info(
|
|
||||||
"Runner appears to have registered and running.",
|
|
||||||
"podCreationTimestamp", runnerPod.CreationTimestamp,
|
|
||||||
)
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete current pod if recreation is needed
|
|
||||||
if err := r.Delete(ctx, &runnerPod); err != nil {
|
|
||||||
log.Error(err, "Failed to delete pod resource")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Recorder.Event(&runnerPod, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", runnerPod.Name))
|
|
||||||
log.Info("Deleted runner pod", "name", runnerPod.Name)
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
func (r *RunnerPodReconciler) unregistrationRetryDelay() time.Duration {
|
||||||
runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo)
|
retryDelay := DefaultUnregistrationRetryDelay
|
||||||
if err != nil {
|
|
||||||
return false, err
|
if r.UnregistrationRetryDelay > 0 {
|
||||||
|
retryDelay = r.UnregistrationRetryDelay
|
||||||
}
|
}
|
||||||
|
return retryDelay
|
||||||
var busy bool
|
|
||||||
|
|
||||||
id := int64(0)
|
|
||||||
for _, runner := range runners {
|
|
||||||
if runner.GetName() == name {
|
|
||||||
// Sometimes a runner can stuck "busy" even though it is already "offline".
|
|
||||||
// Thus removing the condition on status can block the runner pod from being terminated forever.
|
|
||||||
busy = runner.GetBusy()
|
|
||||||
if runner.GetStatus() != "offline" && busy {
|
|
||||||
r.Log.Info("This runner will delay the runner pod deletion and the runner deregistration until it becomes either offline or non-busy", "name", runner.GetName(), "status", runner.GetStatus(), "busy", runner.GetBusy())
|
|
||||||
return false, fmt.Errorf("runner is busy")
|
|
||||||
}
|
|
||||||
id = runner.GetID()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if id == int64(0) {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sometimes a runner can stuck "busy" even though it is already "offline".
|
|
||||||
// Trying to remove the offline but busy runner can result in errors like the following:
|
|
||||||
// failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
|
|
||||||
if !busy {
|
|
||||||
if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
|
|||||||
600
controllers/runner_pod_owner.go
Normal file
600
controllers/runner_pod_owner.go
Normal file
@@ -0,0 +1,600 @@
|
|||||||
|
package controllers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
|
"github.com/go-logr/logr"
|
||||||
|
appsv1 "k8s.io/api/apps/v1"
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/api/errors"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
type podsForOwner struct {
|
||||||
|
total int
|
||||||
|
completed int
|
||||||
|
running int
|
||||||
|
terminating int
|
||||||
|
regTimeout int
|
||||||
|
pending int
|
||||||
|
templateHash string
|
||||||
|
runner *v1alpha1.Runner
|
||||||
|
statefulSet *appsv1.StatefulSet
|
||||||
|
owner owner
|
||||||
|
object client.Object
|
||||||
|
synced bool
|
||||||
|
pods []corev1.Pod
|
||||||
|
}
|
||||||
|
|
||||||
|
type owner interface {
|
||||||
|
client.Object
|
||||||
|
|
||||||
|
pods(context.Context, client.Client) ([]corev1.Pod, error)
|
||||||
|
templateHash() (string, bool)
|
||||||
|
withAnnotation(k, v string) client.Object
|
||||||
|
synced() bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type ownerRunner struct {
|
||||||
|
client.Object
|
||||||
|
|
||||||
|
Log logr.Logger
|
||||||
|
Runner *v1alpha1.Runner
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ owner = (*ownerRunner)(nil)
|
||||||
|
|
||||||
|
func (r *ownerRunner) pods(ctx context.Context, c client.Client) ([]corev1.Pod, error) {
|
||||||
|
var pod corev1.Pod
|
||||||
|
|
||||||
|
if err := c.Get(ctx, types.NamespacedName{Namespace: r.Runner.Namespace, Name: r.Runner.Name}, &pod); err != nil {
|
||||||
|
if errors.IsNotFound(err) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
r.Log.Error(err, "Failed to get pod managed by runner")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return []corev1.Pod{pod}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ownerRunner) templateHash() (string, bool) {
|
||||||
|
return getRunnerTemplateHash(r.Runner)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ownerRunner) withAnnotation(k, v string) client.Object {
|
||||||
|
copy := r.Runner.DeepCopy()
|
||||||
|
setAnnotation(©.ObjectMeta, k, v)
|
||||||
|
return copy
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ownerRunner) synced() bool {
|
||||||
|
return r.Runner.Status.Phase != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
type ownerStatefulSet struct {
|
||||||
|
client.Object
|
||||||
|
|
||||||
|
Log logr.Logger
|
||||||
|
StatefulSet *appsv1.StatefulSet
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ owner = (*ownerStatefulSet)(nil)
|
||||||
|
|
||||||
|
func (s *ownerStatefulSet) pods(ctx context.Context, c client.Client) ([]corev1.Pod, error) {
|
||||||
|
var podList corev1.PodList
|
||||||
|
|
||||||
|
if err := c.List(ctx, &podList, client.MatchingLabels(s.StatefulSet.Spec.Template.ObjectMeta.Labels)); err != nil {
|
||||||
|
s.Log.Error(err, "Failed to list pods managed by statefulset")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var pods []corev1.Pod
|
||||||
|
|
||||||
|
for _, pod := range podList.Items {
|
||||||
|
if owner := metav1.GetControllerOf(&pod); owner == nil || owner.Kind != "StatefulSet" || owner.Name != s.StatefulSet.Name {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pods = append(pods, pod)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pods, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ownerStatefulSet) templateHash() (string, bool) {
|
||||||
|
return getRunnerTemplateHash(s.StatefulSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ownerStatefulSet) withAnnotation(k, v string) client.Object {
|
||||||
|
copy := s.StatefulSet.DeepCopy()
|
||||||
|
setAnnotation(©.ObjectMeta, k, v)
|
||||||
|
return copy
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ownerStatefulSet) synced() bool {
|
||||||
|
var replicas int32 = 1
|
||||||
|
if s.StatefulSet.Spec.Replicas != nil {
|
||||||
|
replicas = *s.StatefulSet.Spec.Replicas
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.StatefulSet.Status.Replicas != replicas {
|
||||||
|
s.Log.V(2).Info("Waiting for statefulset to sync", "desiredReplicas", replicas, "currentReplicas", s.StatefulSet.Status.Replicas)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPodsForOwner(ctx context.Context, c client.Client, log logr.Logger, o client.Object) (*podsForOwner, error) {
|
||||||
|
var (
|
||||||
|
owner owner
|
||||||
|
runner *v1alpha1.Runner
|
||||||
|
statefulSet *appsv1.StatefulSet
|
||||||
|
object client.Object
|
||||||
|
)
|
||||||
|
|
||||||
|
switch v := o.(type) {
|
||||||
|
case *v1alpha1.Runner:
|
||||||
|
owner = &ownerRunner{
|
||||||
|
Log: log,
|
||||||
|
Runner: v,
|
||||||
|
Object: v,
|
||||||
|
}
|
||||||
|
runner = v
|
||||||
|
object = v
|
||||||
|
case *appsv1.StatefulSet:
|
||||||
|
owner = &ownerStatefulSet{
|
||||||
|
Log: log,
|
||||||
|
StatefulSet: v,
|
||||||
|
Object: v,
|
||||||
|
}
|
||||||
|
statefulSet = v
|
||||||
|
object = v
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("BUG: Unsupported runner pods owner %v(%T)", v, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
pods, err := owner.pods(ctx, c)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var completed, running, terminating, regTimeout, pending, total int
|
||||||
|
|
||||||
|
for _, pod := range pods {
|
||||||
|
total++
|
||||||
|
|
||||||
|
if runnerPodOrContainerIsStopped(&pod) {
|
||||||
|
completed++
|
||||||
|
} else if pod.Status.Phase == corev1.PodRunning {
|
||||||
|
if podRunnerID(&pod) == "" && podConditionTransitionTimeAfter(&pod, corev1.PodReady, registrationTimeout) {
|
||||||
|
log.Info(
|
||||||
|
"Runner failed to register itself to GitHub in timely manner. "+
|
||||||
|
"Recreating the pod to see if it resolves the issue. "+
|
||||||
|
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
||||||
|
"See https://github.com/actions-runner-controller/actions-runner-controller/issues/288",
|
||||||
|
"creationTimestamp", pod.CreationTimestamp,
|
||||||
|
"readyTransitionTime", podConditionTransitionTime(&pod, corev1.PodReady, corev1.ConditionTrue),
|
||||||
|
"configuredRegistrationTimeout", registrationTimeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
regTimeout++
|
||||||
|
} else {
|
||||||
|
running++
|
||||||
|
}
|
||||||
|
} else if !pod.DeletionTimestamp.IsZero() {
|
||||||
|
terminating++
|
||||||
|
} else {
|
||||||
|
// pending includes running but timedout runner's pod too
|
||||||
|
pending++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
templateHash, ok := owner.templateHash()
|
||||||
|
if !ok {
|
||||||
|
log.Info("Failed to get template hash of statefulset. It must be in an invalid state. Please manually delete the statefulset so that it is recreated")
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
synced := owner.synced()
|
||||||
|
|
||||||
|
return &podsForOwner{
|
||||||
|
total: total,
|
||||||
|
completed: completed,
|
||||||
|
running: running,
|
||||||
|
terminating: terminating,
|
||||||
|
regTimeout: regTimeout,
|
||||||
|
pending: pending,
|
||||||
|
templateHash: templateHash,
|
||||||
|
runner: runner,
|
||||||
|
statefulSet: statefulSet,
|
||||||
|
owner: owner,
|
||||||
|
object: object,
|
||||||
|
synced: synced,
|
||||||
|
pods: pods,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getRunnerTemplateHash(r client.Object) (string, bool) {
|
||||||
|
hash, ok := r.GetLabels()[LabelKeyRunnerTemplateHash]
|
||||||
|
|
||||||
|
return hash, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
type state struct {
|
||||||
|
podsForOwners map[string][]*podsForOwner
|
||||||
|
lastSyncTime *time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
type result struct {
|
||||||
|
currentObjects []*podsForOwner
|
||||||
|
}
|
||||||
|
|
||||||
|
// Why `create` must be a function rather than a client.Object? That's becase we use it to create one or more objects on scale up.
|
||||||
|
//
|
||||||
|
// We use client.Create to create a necessary number of client.Object. client.Create mutates the passed object on a successful creation.
|
||||||
|
// It seems to set .Revision at least, and the existence of .Revision let client.Create fail due to K8s restriction that an object being just created
|
||||||
|
// can't have .Revision.
|
||||||
|
// Now, imagine that you are to add 2 runner replicas on scale up.
|
||||||
|
// We create one resource object per a replica that ends up calling 2 client.Create calls.
|
||||||
|
// If we were reusing client.Object to be passed to client.Create calls, only the first call suceeeds.
|
||||||
|
// The second call fails due to the first call mutated the client.Object to have .Revision.
|
||||||
|
// Passing a factory function of client.Object and creating a brand-new client.Object per a client.Create call resolves this issue,
|
||||||
|
// allowing us to create two or more replicas in one reconcilation loop without being rejected by K8s.
|
||||||
|
func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger, effectiveTime *metav1.Time, newDesiredReplicas int, create func() client.Object, ephemeral bool, owners []client.Object) (*result, error) {
|
||||||
|
state, err := collectPodsForOwners(ctx, c, log, owners)
|
||||||
|
if err != nil || state == nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
podsForOwnersPerTemplateHash, lastSyncTime := state.podsForOwners, state.lastSyncTime
|
||||||
|
|
||||||
|
// # Why do we recreate statefulsets instead of updating their desired replicas?
|
||||||
|
//
|
||||||
|
// A statefulset cannot add more pods when not all the pods are running.
|
||||||
|
// Our ephemeral runners' pods that have finished running become Completed(Phase=Succeeded).
|
||||||
|
// So creating one statefulset per a batch of ephemeral runners is the only way for us to add more replicas.
|
||||||
|
//
|
||||||
|
// # Why do we recreate statefulsets instead of updating fields other than replicas?
|
||||||
|
//
|
||||||
|
// That's because Kubernetes doesn't allow updating anything other than replicas, template, and updateStrategy.
|
||||||
|
// And the nature of ephemeral runner pods requires you to create a statefulset per a batch of new runner pods so
|
||||||
|
// we have really no other choice.
|
||||||
|
//
|
||||||
|
// If you're curious, the below is the error message you will get when you tried to update forbidden StatefulSet field(s):
|
||||||
|
//
|
||||||
|
// 2021-06-13T07:19:52.760Z ERROR actions-runner-controller.runnerset Failed to patch statefulset
|
||||||
|
// {"runnerset": "default/example-runnerset", "error": "StatefulSet.apps \"example-runnerset\" is invalid: s
|
||||||
|
// pec: Forbidden: updates to statefulset spec for fields other than 'replicas', 'template', and 'updateStrategy'
|
||||||
|
// are forbidden"}
|
||||||
|
//
|
||||||
|
// Even though the error message includes "Forbidden", this error's reason is "Invalid".
|
||||||
|
// So we used to match these errors by using errors.IsInvalid. But that's another story...
|
||||||
|
|
||||||
|
desiredTemplateHash, ok := getRunnerTemplateHash(create())
|
||||||
|
if !ok {
|
||||||
|
log.Info("Failed to get template hash of desired owner resource. It must be in an invalid state. Please manually delete the owner so that it is recreated")
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
currentObjects := podsForOwnersPerTemplateHash[desiredTemplateHash]
|
||||||
|
|
||||||
|
sort.SliceStable(currentObjects, func(i, j int) bool {
|
||||||
|
return currentObjects[i].owner.GetCreationTimestamp().Time.Before(currentObjects[j].owner.GetCreationTimestamp().Time)
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(currentObjects) > 0 {
|
||||||
|
timestampFirst := currentObjects[0].owner.GetCreationTimestamp()
|
||||||
|
timestampLast := currentObjects[len(currentObjects)-1].owner.GetCreationTimestamp()
|
||||||
|
var names []string
|
||||||
|
for _, ss := range currentObjects {
|
||||||
|
names = append(names, ss.owner.GetName())
|
||||||
|
}
|
||||||
|
log.V(2).Info("Detected some current object(s)", "creationTimestampFirst", timestampFirst, "creationTimestampLast", timestampLast, "names", names)
|
||||||
|
}
|
||||||
|
|
||||||
|
var total, terminating, pending, running, regTimeout int
|
||||||
|
|
||||||
|
for _, ss := range currentObjects {
|
||||||
|
total += ss.total
|
||||||
|
terminating += ss.terminating
|
||||||
|
pending += ss.pending
|
||||||
|
running += ss.running
|
||||||
|
regTimeout += ss.regTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
numOwners := len(owners)
|
||||||
|
|
||||||
|
var hashes []string
|
||||||
|
for h, _ := range state.podsForOwners {
|
||||||
|
hashes = append(hashes, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info(
|
||||||
|
"Found some pods across owner(s)",
|
||||||
|
"total", total,
|
||||||
|
"terminating", terminating,
|
||||||
|
"pending", pending,
|
||||||
|
"running", running,
|
||||||
|
"regTimeout", regTimeout,
|
||||||
|
"desired", newDesiredReplicas,
|
||||||
|
"owners", numOwners,
|
||||||
|
)
|
||||||
|
|
||||||
|
maybeRunning := pending + running
|
||||||
|
|
||||||
|
wantMoreRunners := newDesiredReplicas > maybeRunning
|
||||||
|
alreadySyncedAfterEffectiveTime := ephemeral && lastSyncTime != nil && effectiveTime != nil && lastSyncTime.After(effectiveTime.Time)
|
||||||
|
runnerPodRecreationDelayAfterWebhookScale := lastSyncTime != nil && time.Now().Before(lastSyncTime.Add(DefaultRunnerPodRecreationDelayAfterWebhookScale))
|
||||||
|
|
||||||
|
log = log.WithValues(
|
||||||
|
"lastSyncTime", lastSyncTime,
|
||||||
|
"effectiveTime", effectiveTime,
|
||||||
|
"templateHashDesired", desiredTemplateHash,
|
||||||
|
"replicasDesired", newDesiredReplicas,
|
||||||
|
"replicasPending", pending,
|
||||||
|
"replicasRunning", running,
|
||||||
|
"replicasMaybeRunning", maybeRunning,
|
||||||
|
"templateHashObserved", hashes,
|
||||||
|
)
|
||||||
|
|
||||||
|
if wantMoreRunners && alreadySyncedAfterEffectiveTime && runnerPodRecreationDelayAfterWebhookScale {
|
||||||
|
// This is our special handling of the situation for ephemeral runners only.
|
||||||
|
//
|
||||||
|
// Handling static runners this way results in scale-up to not work at all,
|
||||||
|
// because then any scale up attempts for static runenrs fall within this condition, for two reasons.
|
||||||
|
// First, static(persistent) runners will never restart on their own.
|
||||||
|
// Second, we don't update EffectiveTime for static runners.
|
||||||
|
//
|
||||||
|
// We do need to skip this condition for static runners, and that's why we take the `ephemeral` flag into account when
|
||||||
|
// computing `alreadySyncedAfterEffectiveTime``.
|
||||||
|
|
||||||
|
log.V(2).Info(
|
||||||
|
"Detected that some ephemeral runners have disappeared. " +
|
||||||
|
"Usually this is due to that ephemeral runner completions " +
|
||||||
|
"so ARC does not create new runners until EffectiveTime is updated, or DefaultRunnerPodRecreationDelayAfterWebhookScale is elapsed.")
|
||||||
|
} else if wantMoreRunners {
|
||||||
|
if alreadySyncedAfterEffectiveTime && !runnerPodRecreationDelayAfterWebhookScale {
|
||||||
|
log.V(2).Info("Adding more replicas because DefaultRunnerPodRecreationDelayAfterWebhookScale has been passed")
|
||||||
|
}
|
||||||
|
|
||||||
|
num := newDesiredReplicas - maybeRunning
|
||||||
|
|
||||||
|
for i := 0; i < num; i++ {
|
||||||
|
// Add more replicas
|
||||||
|
if err := c.Create(ctx, create()); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(1).Info("Created replica(s)",
|
||||||
|
"created", num,
|
||||||
|
)
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
} else if newDesiredReplicas <= running {
|
||||||
|
// If you use ephemeral runners with webhook-based autoscaler and the runner controller is working normally,
|
||||||
|
// you're unlikely to fall into this branch.
|
||||||
|
//
|
||||||
|
// That's because all the stakeholders work like this:
|
||||||
|
//
|
||||||
|
// 1. A runner pod completes with the runner container exiting with code 0
|
||||||
|
// 2. ARC runner controller detects the pod completion, marks the owner(runner or statefulset) resource on k8s for deletion (=Runner.DeletionTimestamp becomes non-zero)
|
||||||
|
// 3. GitHub triggers a corresponding workflow_job "complete" webhook event
|
||||||
|
// 4. ARC github-webhook-server (webhook-based autoscaler) receives the webhook event updates HRA with removing the oldest capacity reservation
|
||||||
|
// 5. ARC horizontalrunnerautoscaler updates RunnerDeployment's desired replicas based on capacity reservations
|
||||||
|
// 6. ARC runnerdeployment controller updates RunnerReplicaSet's desired replicas
|
||||||
|
// 7. (We're here) ARC runnerset or runnerreplicaset controller starts reconciling the owner resource (statefulset or runner)
|
||||||
|
//
|
||||||
|
// In a normally working ARC installation, the runner that was used to run the workflow job should already have been
|
||||||
|
// marked for deletion by the runner controller.
|
||||||
|
// This runnerreplicaset controller doesn't count marked runners into the `running` value, hence you're unlikely to
|
||||||
|
// fall into this branch when you're using ephemeral runners with webhook-based-autoscaler.
|
||||||
|
|
||||||
|
var retained int
|
||||||
|
|
||||||
|
var delete []*podsForOwner
|
||||||
|
for i := len(currentObjects) - 1; i >= 0; i-- {
|
||||||
|
ss := currentObjects[i]
|
||||||
|
|
||||||
|
if ss.running == 0 || retained >= newDesiredReplicas {
|
||||||
|
// In case the desired replicas is satisfied until i-1, or this owner has no running pods,
|
||||||
|
// this owner can be considered safe for deletion.
|
||||||
|
// Note that we already waited on this owner to create pods by waiting for
|
||||||
|
// `.Status.Replicas`(=total number of pods managed by owner, regardless of the runner is Running or Completed) to match the desired replicas in a previous step.
|
||||||
|
// So `.running == 0` means "the owner has created the desired number of pods before, and all of them are completed now".
|
||||||
|
delete = append(delete, ss)
|
||||||
|
} else if retained < newDesiredReplicas {
|
||||||
|
retained += ss.running
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if retained == newDesiredReplicas {
|
||||||
|
for _, ss := range delete {
|
||||||
|
log := log.WithValues("owner", types.NamespacedName{Namespace: ss.owner.GetNamespace(), Name: ss.owner.GetName()})
|
||||||
|
// Statefulset termination process 1/4: Set unregistrationRequestTimestamp only after all the pods managed by the statefulset have
|
||||||
|
// started unregistreation process.
|
||||||
|
//
|
||||||
|
// NOTE: We just mark it instead of immediately starting the deletion process.
|
||||||
|
// Otherwise, the runner pod may hit termiationGracePeriod before the unregistration completes(the max terminationGracePeriod is limited to 1h by K8s and a job can be run for more than that),
|
||||||
|
// or actions/runner may potentially misbehave on SIGTERM immediately sent by K8s.
|
||||||
|
// We'd better unregister first and then start a pod deletion process.
|
||||||
|
// The annotation works as a mark to start the pod unregistration and deletion process of ours.
|
||||||
|
|
||||||
|
if _, ok := getAnnotation(ss.owner, AnnotationKeyUnregistrationRequestTimestamp); ok {
|
||||||
|
log.V(2).Info("Still waiting for runner pod(s) unregistration to complete")
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, po := range ss.pods {
|
||||||
|
if _, err := annotatePodOnce(ctx, c, log, &po, AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updated := ss.owner.withAnnotation(AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
if err := c.Patch(ctx, updated, client.MergeFrom(ss.owner)); err != nil {
|
||||||
|
log.Error(err, fmt.Sprintf("Failed to patch owner to have %s annotation", AnnotationKeyUnregistrationRequestTimestamp))
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Redundant owner has been annotated to start the unregistration before deletion")
|
||||||
|
}
|
||||||
|
} else if retained > newDesiredReplicas {
|
||||||
|
log.V(2).Info("Waiting sync before scale down", "retained", retained, "newDesiredReplicas", newDesiredReplicas)
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
} else {
|
||||||
|
log.Info("Invalid state", "retained", retained, "newDesiredReplicas", newDesiredReplicas)
|
||||||
|
panic("crashed due to invalid state")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sss := range podsForOwnersPerTemplateHash {
|
||||||
|
for _, ss := range sss {
|
||||||
|
if ss.templateHash != desiredTemplateHash {
|
||||||
|
if ss.owner.GetDeletionTimestamp().IsZero() {
|
||||||
|
if err := c.Delete(ctx, ss.object); err != nil {
|
||||||
|
log.Error(err, "Unable to delete object")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Deleted redundant and outdated object")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &result{
|
||||||
|
currentObjects: currentObjects,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger, owners []client.Object) (*state, error) {
|
||||||
|
podsForOwnerPerTemplateHash := map[string][]*podsForOwner{}
|
||||||
|
|
||||||
|
// lastSyncTime becomes non-nil only when there are one or more owner(s) hence there are same number of runner pods.
|
||||||
|
// It's used to prevent runnerset-controller from recreating "completed ephemeral runners".
|
||||||
|
// This is needed to prevent runners from being terminated prematurely.
|
||||||
|
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/911 for more context.
|
||||||
|
//
|
||||||
|
// This becomes nil when there are zero statefulset(s). That's fine because then there should be zero stateful(s) to be recreated either hence
|
||||||
|
// we don't need to guard with lastSyncTime.
|
||||||
|
var lastSyncTime *time.Time
|
||||||
|
|
||||||
|
for _, ss := range owners {
|
||||||
|
log := log.WithValues("owner", types.NamespacedName{Namespace: ss.GetNamespace(), Name: ss.GetName()})
|
||||||
|
|
||||||
|
res, err := getPodsForOwner(ctx, c, log, ss)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.templateHash == "" {
|
||||||
|
log.Info("validation error: runner pod owner must have template hash", "object", res.object)
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Statefulset termination process 4/4: Let Kubernetes cascade-delete the statefulset and the pods.
|
||||||
|
//
|
||||||
|
// If the runner is already marked for deletion(=has a non-zero deletion timestamp) by the runner controller (can be caused by an ephemeral runner completion)
|
||||||
|
// or by this controller (in case it was deleted in the previous reconcilation loop),
|
||||||
|
// we don't need to bother calling GitHub API to re-mark the runner for deletion.
|
||||||
|
// Just hold on, and runners will disappear as long as the runner controller is up and running.
|
||||||
|
if !res.owner.GetDeletionTimestamp().IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Statefulset termination process 3/4: Set the deletionTimestamp to let Kubernetes start a cascade deletion of the statefulset and the pods.
|
||||||
|
if _, ok := getAnnotation(res.owner, AnnotationKeyUnregistrationCompleteTimestamp); ok {
|
||||||
|
if err := c.Delete(ctx, res.object); err != nil {
|
||||||
|
log.Error(err, "Failed to delete owner")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Started deletion of owner")
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Statefulset termination process 2/4: Set unregistrationCompleteTimestamp only if all the pods managed by the statefulset
|
||||||
|
// have either unregistered or being deleted.
|
||||||
|
if _, ok := getAnnotation(res.owner, AnnotationKeyUnregistrationRequestTimestamp); ok {
|
||||||
|
var deletionSafe int
|
||||||
|
for _, po := range res.pods {
|
||||||
|
if _, ok := getAnnotation(&po, AnnotationKeyUnregistrationCompleteTimestamp); ok {
|
||||||
|
deletionSafe++
|
||||||
|
} else if !po.DeletionTimestamp.IsZero() {
|
||||||
|
deletionSafe++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if deletionSafe == res.total {
|
||||||
|
log.V(2).Info("Marking owner for unregistration completion", "deletionSafe", deletionSafe, "total", res.total)
|
||||||
|
|
||||||
|
if _, ok := getAnnotation(res.owner, AnnotationKeyUnregistrationCompleteTimestamp); !ok {
|
||||||
|
updated := res.owner.withAnnotation(AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
|
||||||
|
if err := c.Patch(ctx, updated, client.MergeFrom(res.owner)); err != nil {
|
||||||
|
log.Error(err, fmt.Sprintf("Failed to patch owner to have %s annotation", AnnotationKeyUnregistrationCompleteTimestamp))
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Redundant owner has been annotated to start the deletion")
|
||||||
|
} else {
|
||||||
|
log.V(2).Info("BUG: Redundant owner was already annotated to start the deletion")
|
||||||
|
}
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if annotations := res.owner.GetAnnotations(); annotations != nil {
|
||||||
|
if a, ok := annotations[SyncTimeAnnotationKey]; ok {
|
||||||
|
t, err := time.Parse(time.RFC3339, a)
|
||||||
|
if err == nil {
|
||||||
|
if lastSyncTime == nil || lastSyncTime.Before(t) {
|
||||||
|
lastSyncTime = &t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A completed owner and a completed runner pod can safely be deleted without
|
||||||
|
// a race condition so delete it here,
|
||||||
|
// so that the later process can be a bit simpler.
|
||||||
|
if res.total > 0 && res.total == res.completed {
|
||||||
|
if err := c.Delete(ctx, ss); err != nil {
|
||||||
|
log.Error(err, "Unable to delete owner")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Deleted completed owner")
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !res.synced {
|
||||||
|
log.V(1).Info("Skipped reconcilation because owner is not synced yet", "pods", res.pods)
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
podsForOwnerPerTemplateHash[res.templateHash] = append(podsForOwnerPerTemplateHash[res.templateHash], res)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &state{podsForOwnerPerTemplateHash, lastSyncTime}, nil
|
||||||
|
}
|
||||||
@@ -118,6 +118,8 @@ func (r *RunnerDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Info("Created runnerreplicaset", "runnerreplicaset", desiredRS.Name)
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,6 +144,8 @@ func (r *RunnerDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Info("Created runnerreplicaset", "runnerreplicaset", desiredRS.Name)
|
||||||
|
|
||||||
// We requeue in order to clean up old runner replica sets later.
|
// We requeue in order to clean up old runner replica sets later.
|
||||||
// Otherwise, they aren't cleaned up until the next re-sync interval.
|
// Otherwise, they aren't cleaned up until the next re-sync interval.
|
||||||
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
|
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
|
||||||
@@ -177,6 +181,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
||||||
if currentDesiredReplicas != newDesiredReplicas {
|
if currentDesiredReplicas != newDesiredReplicas {
|
||||||
newestSet.Spec.Replicas = &newDesiredReplicas
|
newestSet.Spec.Replicas = &newDesiredReplicas
|
||||||
|
newestSet.Spec.EffectiveTime = rd.Spec.EffectiveTime
|
||||||
|
|
||||||
if err := r.Client.Update(ctx, newestSet); err != nil {
|
if err := r.Client.Update(ctx, newestSet); err != nil {
|
||||||
log.Error(err, "Failed to update runnerreplicaset resource")
|
log.Error(err, "Failed to update runnerreplicaset resource")
|
||||||
@@ -221,15 +226,38 @@ func (r *RunnerDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
for i := range oldSets {
|
for i := range oldSets {
|
||||||
rs := oldSets[i]
|
rs := oldSets[i]
|
||||||
|
|
||||||
|
rslog := log.WithValues("runnerreplicaset", rs.Name)
|
||||||
|
|
||||||
|
if rs.Status.Replicas != nil && *rs.Status.Replicas > 0 {
|
||||||
|
if rs.Spec.Replicas != nil && *rs.Spec.Replicas == 0 {
|
||||||
|
rslog.V(2).Info("Waiting for runnerreplicaset to scale to zero")
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
updated := rs.DeepCopy()
|
||||||
|
zero := 0
|
||||||
|
updated.Spec.Replicas = &zero
|
||||||
|
if err := r.Client.Update(ctx, updated); err != nil {
|
||||||
|
rslog.Error(err, "Failed to scale runnerreplicaset to zero")
|
||||||
|
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
rslog.Info("Scaled runnerreplicaset to zero")
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if err := r.Client.Delete(ctx, &rs); err != nil {
|
if err := r.Client.Delete(ctx, &rs); err != nil {
|
||||||
log.Error(err, "Failed to delete runnerreplicaset resource")
|
rslog.Error(err, "Failed to delete runnerreplicaset resource")
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name))
|
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name))
|
||||||
|
|
||||||
log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name)
|
rslog.Info("Deleted runnerreplicaset")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -393,9 +421,7 @@ func getSelector(rd *v1alpha1.RunnerDeployment) *metav1.LabelSelector {
|
|||||||
func newRunnerReplicaSet(rd *v1alpha1.RunnerDeployment, commonRunnerLabels []string, scheme *runtime.Scheme) (*v1alpha1.RunnerReplicaSet, error) {
|
func newRunnerReplicaSet(rd *v1alpha1.RunnerDeployment, commonRunnerLabels []string, scheme *runtime.Scheme) (*v1alpha1.RunnerReplicaSet, error) {
|
||||||
newRSTemplate := *rd.Spec.Template.DeepCopy()
|
newRSTemplate := *rd.Spec.Template.DeepCopy()
|
||||||
|
|
||||||
for _, l := range commonRunnerLabels {
|
newRSTemplate.Spec.Labels = append(newRSTemplate.Spec.Labels, commonRunnerLabels...)
|
||||||
newRSTemplate.Spec.Labels = append(newRSTemplate.Spec.Labels, l)
|
|
||||||
}
|
|
||||||
|
|
||||||
templateHash := ComputeHash(&newRSTemplate)
|
templateHash := ComputeHash(&newRSTemplate)
|
||||||
|
|
||||||
@@ -417,9 +443,10 @@ func newRunnerReplicaSet(rd *v1alpha1.RunnerDeployment, commonRunnerLabels []str
|
|||||||
Labels: newRSTemplate.ObjectMeta.Labels,
|
Labels: newRSTemplate.ObjectMeta.Labels,
|
||||||
},
|
},
|
||||||
Spec: v1alpha1.RunnerReplicaSetSpec{
|
Spec: v1alpha1.RunnerReplicaSetSpec{
|
||||||
Replicas: rd.Spec.Replicas,
|
Replicas: rd.Spec.Replicas,
|
||||||
Selector: newRSSelector,
|
Selector: newRSSelector,
|
||||||
Template: newRSTemplate,
|
Template: newRSTemplate,
|
||||||
|
EffectiveTime: rd.Spec.EffectiveTime,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,13 +18,10 @@ package controllers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"reflect"
|
"reflect"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-logr/logr"
|
"github.com/go-logr/logr"
|
||||||
gogithub "github.com/google/go-github/v39/github"
|
|
||||||
|
|
||||||
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
@@ -32,7 +29,6 @@ import (
|
|||||||
ctrl "sigs.k8s.io/controller-runtime"
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
|
||||||
corev1 "k8s.io/api/core/v1"
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
@@ -49,6 +45,10 @@ type RunnerReplicaSetReconciler struct {
|
|||||||
Name string
|
Name string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
SyncTimeAnnotationKey = "sync-time"
|
||||||
|
)
|
||||||
|
|
||||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
|
||||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
|
||||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
|
||||||
@@ -65,18 +65,42 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !rs.ObjectMeta.DeletionTimestamp.IsZero() {
|
if !rs.ObjectMeta.DeletionTimestamp.IsZero() {
|
||||||
|
// RunnerReplicaSet cannot be gracefuly removed.
|
||||||
|
// That means any runner that is running a job can be prematurely terminated.
|
||||||
|
// To gracefully remove a RunnerReplicaSet, scale it down to zero first, observe RunnerReplicaSet's status replicas,
|
||||||
|
// and remove it only after the status replicas becomes zero.
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if rs.ObjectMeta.Labels == nil {
|
||||||
|
rs.ObjectMeta.Labels = map[string]string{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Template hash is usually set by the upstream controller(RunnerDeplloyment controller) on authoring
|
||||||
|
// RunerReplicaset resource, but it may be missing when the user directly created RunnerReplicaSet.
|
||||||
|
// As a template hash is required by by the runner replica management, we dynamically add it here without ever persisting it.
|
||||||
|
if rs.ObjectMeta.Labels[LabelKeyRunnerTemplateHash] == "" {
|
||||||
|
template := rs.Spec.DeepCopy()
|
||||||
|
template.Replicas = nil
|
||||||
|
template.EffectiveTime = nil
|
||||||
|
templateHash := ComputeHash(template)
|
||||||
|
|
||||||
|
log.Info("Using auto-generated template hash", "value", templateHash)
|
||||||
|
|
||||||
|
rs.ObjectMeta.Labels = CloneAndAddLabel(rs.ObjectMeta.Labels, LabelKeyRunnerTemplateHash, templateHash)
|
||||||
|
rs.Spec.Template.ObjectMeta.Labels = CloneAndAddLabel(rs.Spec.Template.ObjectMeta.Labels, LabelKeyRunnerTemplateHash, templateHash)
|
||||||
|
}
|
||||||
|
|
||||||
selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
|
selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the Runners managed by the target RunnerReplicaSet
|
// Get the Runners managed by the target RunnerReplicaSet
|
||||||
var allRunners v1alpha1.RunnerList
|
var runnerList v1alpha1.RunnerList
|
||||||
if err := r.List(
|
if err := r.List(
|
||||||
ctx,
|
ctx,
|
||||||
&allRunners,
|
&runnerList,
|
||||||
client.InNamespace(req.Namespace),
|
client.InNamespace(req.Namespace),
|
||||||
client.MatchingLabelsSelector{Selector: selector},
|
client.MatchingLabelsSelector{Selector: selector},
|
||||||
); err != nil {
|
); err != nil {
|
||||||
@@ -85,179 +109,44 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var myRunners []v1alpha1.Runner
|
replicas := 1
|
||||||
|
if rs.Spec.Replicas != nil {
|
||||||
|
replicas = *rs.Spec.Replicas
|
||||||
|
}
|
||||||
|
|
||||||
|
effectiveTime := rs.Spec.EffectiveTime
|
||||||
|
ephemeral := rs.Spec.Template.Spec.Ephemeral == nil || *rs.Spec.Template.Spec.Ephemeral
|
||||||
|
|
||||||
|
desired, err := r.newRunner(rs)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err, "Could not create runner")
|
||||||
|
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var live []client.Object
|
||||||
|
for _, r := range runnerList.Items {
|
||||||
|
r := r
|
||||||
|
live = append(live, &r)
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, replicas, func() client.Object { return desired.DeepCopy() }, ephemeral, live)
|
||||||
|
if err != nil || res == nil {
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
current int
|
status v1alpha1.RunnerReplicaSetStatus
|
||||||
ready int
|
|
||||||
available int
|
current, available, ready int
|
||||||
)
|
)
|
||||||
|
|
||||||
for _, r := range allRunners.Items {
|
for _, o := range res.currentObjects {
|
||||||
// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
|
current += o.total
|
||||||
// to not treat all the runners in the namespace as its children.
|
available += o.running
|
||||||
if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) {
|
ready += o.running
|
||||||
myRunners = append(myRunners, r)
|
|
||||||
|
|
||||||
current += 1
|
|
||||||
|
|
||||||
if r.Status.Phase == string(corev1.PodRunning) {
|
|
||||||
ready += 1
|
|
||||||
// available is currently the same as ready, as we don't yet have minReadySeconds for runners
|
|
||||||
available += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var desired int
|
|
||||||
|
|
||||||
if rs.Spec.Replicas != nil {
|
|
||||||
desired = *rs.Spec.Replicas
|
|
||||||
} else {
|
|
||||||
desired = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: remove this registration runner cleanup later (v0.23.0 or v0.24.0)
|
|
||||||
//
|
|
||||||
// We had to have a registration-only runner to support scale-from-zero before.
|
|
||||||
// But since Sep 2021 Actions update on GitHub Cloud and GHES 3.3, it is unneceesary.
|
|
||||||
// See the below issues for more contexts:
|
|
||||||
// https://github.com/actions-runner-controller/actions-runner-controller/issues/516
|
|
||||||
// https://github.com/actions-runner-controller/actions-runner-controller/issues/859
|
|
||||||
//
|
|
||||||
// In the below block, we have a logic to remove existing registration-only runners as unnecessary.
|
|
||||||
// This logic is introduced since actions-runner-controller 0.21.0 and probably last one or two minor releases
|
|
||||||
// so that actions-runner-controller instance in everyone's cluster won't leave dangling registration-only runners.
|
|
||||||
registrationOnlyRunnerNsName := req.NamespacedName
|
|
||||||
registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
|
|
||||||
registrationOnlyRunner := v1alpha1.Runner{}
|
|
||||||
registrationOnlyRunnerExists := false
|
|
||||||
if err := r.Get(
|
|
||||||
ctx,
|
|
||||||
registrationOnlyRunnerNsName,
|
|
||||||
®istrationOnlyRunner,
|
|
||||||
); err != nil {
|
|
||||||
if !kerrors.IsNotFound(err) {
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
registrationOnlyRunnerExists = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if registrationOnlyRunnerExists {
|
|
||||||
if err := r.Client.Delete(ctx, ®istrationOnlyRunner); err != nil {
|
|
||||||
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
|
|
||||||
|
|
||||||
return ctrl.Result{Requeue: true}, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if current > desired {
|
|
||||||
n := current - desired
|
|
||||||
|
|
||||||
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "current", current, "ready", ready)
|
|
||||||
|
|
||||||
// get runners that are currently offline/not busy/timed-out to register
|
|
||||||
var deletionCandidates []v1alpha1.Runner
|
|
||||||
|
|
||||||
for _, runner := range allRunners.Items {
|
|
||||||
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
|
||||||
if err != nil {
|
|
||||||
notRegistered := false
|
|
||||||
offline := false
|
|
||||||
|
|
||||||
var notFoundException *github.RunnerNotFound
|
|
||||||
var offlineException *github.RunnerOffline
|
|
||||||
if errors.As(err, ¬FoundException) {
|
|
||||||
log.V(1).Info("Failed to check if runner is busy. Either this runner has never been successfully registered to GitHub or it still needs more time.", "runnerName", runner.Name)
|
|
||||||
notRegistered = true
|
|
||||||
} else if errors.As(err, &offlineException) {
|
|
||||||
offline = true
|
|
||||||
} else {
|
|
||||||
var e *gogithub.RateLimitError
|
|
||||||
if errors.As(err, &e) {
|
|
||||||
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
|
||||||
// or the runner is still busy.
|
|
||||||
log.Error(
|
|
||||||
err,
|
|
||||||
fmt.Sprintf(
|
|
||||||
"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls",
|
|
||||||
retryDelayOnGitHubAPIRateLimitError,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
registrationTimeout := 15 * time.Minute
|
|
||||||
currentTime := time.Now()
|
|
||||||
registrationDidTimeout := currentTime.Sub(runner.CreationTimestamp.Add(registrationTimeout)) > 0
|
|
||||||
|
|
||||||
if notRegistered && registrationDidTimeout {
|
|
||||||
log.Info(
|
|
||||||
"Runner failed to register itself to GitHub in timely manner. "+
|
|
||||||
"Marking the runner for scale down. "+
|
|
||||||
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
|
||||||
"See https://github.com/actions-runner-controller/actions-runner-controller/issues/288",
|
|
||||||
"runnerCreationTimestamp", runner.CreationTimestamp,
|
|
||||||
"currentTime", currentTime,
|
|
||||||
"configuredRegistrationTimeout", registrationTimeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
deletionCandidates = append(deletionCandidates, runner)
|
|
||||||
}
|
|
||||||
|
|
||||||
// offline runners should always be a great target for scale down
|
|
||||||
if offline {
|
|
||||||
deletionCandidates = append(deletionCandidates, runner)
|
|
||||||
}
|
|
||||||
} else if !busy {
|
|
||||||
deletionCandidates = append(deletionCandidates, runner)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(deletionCandidates) < n {
|
|
||||||
n = len(deletionCandidates)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.V(0).Info(fmt.Sprintf("Deleting %d runner(s)", n), "desired", desired, "current", current, "ready", ready)
|
|
||||||
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
|
|
||||||
log.Error(err, "Failed to delete runner resource")
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
|
|
||||||
log.Info("Deleted runner")
|
|
||||||
}
|
|
||||||
} else if desired > current {
|
|
||||||
n := desired - current
|
|
||||||
|
|
||||||
log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", current, "ready", ready)
|
|
||||||
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
newRunner, err := r.newRunner(rs)
|
|
||||||
if err != nil {
|
|
||||||
log.Error(err, "Could not create runner")
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := r.Client.Create(ctx, &newRunner); err != nil {
|
|
||||||
log.Error(err, "Failed to create runner resource")
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var status v1alpha1.RunnerReplicaSetStatus
|
|
||||||
|
|
||||||
status.Replicas = ¤t
|
status.Replicas = ¤t
|
||||||
status.AvailableReplicas = &available
|
status.AvailableReplicas = &available
|
||||||
status.ReadyReplicas = &ready
|
status.ReadyReplicas = &ready
|
||||||
@@ -278,10 +167,16 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1alpha1.Runner, error) {
|
func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1alpha1.Runner, error) {
|
||||||
|
// Note that the upstream controller (runnerdeployment) is expected to add
|
||||||
|
// the "runner template hash" label to the template.meta which is necessary to make this controller work correctly
|
||||||
objectMeta := rs.Spec.Template.ObjectMeta.DeepCopy()
|
objectMeta := rs.Spec.Template.ObjectMeta.DeepCopy()
|
||||||
|
|
||||||
objectMeta.GenerateName = rs.ObjectMeta.Name + "-"
|
objectMeta.GenerateName = rs.ObjectMeta.Name + "-"
|
||||||
objectMeta.Namespace = rs.ObjectMeta.Namespace
|
objectMeta.Namespace = rs.ObjectMeta.Namespace
|
||||||
|
if objectMeta.Annotations == nil {
|
||||||
|
objectMeta.Annotations = map[string]string{}
|
||||||
|
}
|
||||||
|
objectMeta.Annotations[SyncTimeAnnotationKey] = time.Now().Format(time.RFC3339)
|
||||||
|
|
||||||
runner := v1alpha1.Runner{
|
runner := v1alpha1.Runner{
|
||||||
TypeMeta: metav1.TypeMeta{},
|
TypeMeta: metav1.TypeMeta{},
|
||||||
@@ -310,7 +205,3 @@ func (r *RunnerReplicaSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|||||||
Named(name).
|
Named(name).
|
||||||
Complete(r)
|
Complete(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
func registrationOnlyRunnerNameFor(rsName string) string {
|
|
||||||
return rsName + "-registration-only"
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
corev1 "k8s.io/api/core/v1"
|
corev1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
|
||||||
"k8s.io/client-go/kubernetes/scheme"
|
"k8s.io/client-go/kubernetes/scheme"
|
||||||
ctrl "sigs.k8s.io/controller-runtime"
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
logf "sigs.k8s.io/controller-runtime/pkg/log"
|
logf "sigs.k8s.io/controller-runtime/pkg/log"
|
||||||
@@ -102,12 +101,40 @@ func intPtr(v int) *int {
|
|||||||
var _ = Context("Inside of a new namespace", func() {
|
var _ = Context("Inside of a new namespace", func() {
|
||||||
ctx := context.TODO()
|
ctx := context.TODO()
|
||||||
ns := SetupTest(ctx)
|
ns := SetupTest(ctx)
|
||||||
|
name := "example-runnerreplicaset"
|
||||||
|
|
||||||
Describe("when no existing resources exist", func() {
|
getRunnerCount := func() int {
|
||||||
|
runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
|
||||||
|
|
||||||
It("should create a new Runner resource from the specified template, add a another Runner on replicas increased, and removes all the replicas when set to 0", func() {
|
selector, err := metav1.LabelSelectorAsSelector(
|
||||||
name := "example-runnerreplicaset"
|
&metav1.LabelSelector{
|
||||||
|
MatchLabels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
logf.Log.Error(err, "failed to create labelselector")
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
err = k8sClient.List(
|
||||||
|
ctx,
|
||||||
|
&runners,
|
||||||
|
client.InNamespace(ns.Name),
|
||||||
|
client.MatchingLabelsSelector{Selector: selector},
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
logf.Log.Error(err, "list runners")
|
||||||
|
}
|
||||||
|
|
||||||
|
runnersList.Sync(runners.Items)
|
||||||
|
|
||||||
|
return len(runners.Items)
|
||||||
|
}
|
||||||
|
|
||||||
|
Describe("RunnerReplicaSet", func() {
|
||||||
|
It("should create a new Runner resource from the specified template", func() {
|
||||||
{
|
{
|
||||||
rs := &actionsv1alpha1.RunnerReplicaSet{
|
rs := &actionsv1alpha1.RunnerReplicaSet{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@@ -146,126 +173,99 @@ var _ = Context("Inside of a new namespace", func() {
|
|||||||
|
|
||||||
Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")
|
Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")
|
||||||
|
|
||||||
runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
|
|
||||||
|
|
||||||
Eventually(
|
Eventually(
|
||||||
func() int {
|
getRunnerCount,
|
||||||
selector, err := metav1.LabelSelectorAsSelector(
|
time.Second*5, time.Second).Should(BeEquivalentTo(1))
|
||||||
&metav1.LabelSelector{
|
|
||||||
MatchLabels: map[string]string{
|
|
||||||
"foo": "bar",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
logf.Log.Error(err, "failed to create labelselector")
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
err = k8sClient.List(
|
|
||||||
ctx,
|
|
||||||
&runners,
|
|
||||||
client.InNamespace(ns.Name),
|
|
||||||
client.MatchingLabelsSelector{Selector: selector},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
logf.Log.Error(err, "list runners")
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
runnersList.Sync(runners.Items)
|
|
||||||
|
|
||||||
return len(runners.Items)
|
|
||||||
},
|
|
||||||
time.Second*5, time.Millisecond*500).Should(BeEquivalentTo(1))
|
|
||||||
}
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should create 2 runners when specified 2 replicas", func() {
|
||||||
{
|
{
|
||||||
// We wrap the update in the Eventually block to avoid the below error that occurs due to concurrent modification
|
rs := &actionsv1alpha1.RunnerReplicaSet{
|
||||||
// made by the controller to update .Status.AvailableReplicas and .Status.ReadyReplicas
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
// Operation cannot be fulfilled on runnerreplicasets.actions.summerwind.dev "example-runnerreplicaset": the object has been modified; please apply your changes to the latest version and try again
|
Name: name,
|
||||||
Eventually(func() error {
|
Namespace: ns.Name,
|
||||||
var rs actionsv1alpha1.RunnerReplicaSet
|
|
||||||
|
|
||||||
err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: name}, &rs)
|
|
||||||
|
|
||||||
Expect(err).NotTo(HaveOccurred(), "failed to get test RunnerReplicaSet resource")
|
|
||||||
|
|
||||||
rs.Spec.Replicas = intPtr(2)
|
|
||||||
|
|
||||||
return k8sClient.Update(ctx, &rs)
|
|
||||||
},
|
|
||||||
time.Second*1, time.Millisecond*500).Should(BeNil())
|
|
||||||
|
|
||||||
runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
|
|
||||||
|
|
||||||
Eventually(
|
|
||||||
func() int {
|
|
||||||
selector, err := metav1.LabelSelectorAsSelector(
|
|
||||||
&metav1.LabelSelector{
|
|
||||||
MatchLabels: map[string]string{
|
|
||||||
"foo": "bar",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
logf.Log.Error(err, "failed to create labelselector")
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
err = k8sClient.List(
|
|
||||||
ctx,
|
|
||||||
&runners,
|
|
||||||
client.InNamespace(ns.Name),
|
|
||||||
client.MatchingLabelsSelector{Selector: selector},
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
logf.Log.Error(err, "list runners")
|
|
||||||
}
|
|
||||||
|
|
||||||
runnersList.Sync(runners.Items)
|
|
||||||
|
|
||||||
return len(runners.Items)
|
|
||||||
},
|
},
|
||||||
time.Second*5, time.Millisecond*500).Should(BeEquivalentTo(2))
|
Spec: actionsv1alpha1.RunnerReplicaSetSpec{
|
||||||
}
|
Replicas: intPtr(2),
|
||||||
|
Selector: &metav1.LabelSelector{
|
||||||
{
|
|
||||||
// We wrap the update in the Eventually block to avoid the below error that occurs due to concurrent modification
|
|
||||||
// made by the controller to update .Status.AvailableReplicas and .Status.ReadyReplicas
|
|
||||||
// Operation cannot be fulfilled on runnersets.actions.summerwind.dev "example-runnerset": the object has been modified; please apply your changes to the latest version and try again
|
|
||||||
Eventually(func() error {
|
|
||||||
var rs actionsv1alpha1.RunnerReplicaSet
|
|
||||||
|
|
||||||
err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: name}, &rs)
|
|
||||||
|
|
||||||
Expect(err).NotTo(HaveOccurred(), "failed to get test RunnerReplicaSet resource")
|
|
||||||
|
|
||||||
rs.Spec.Replicas = intPtr(0)
|
|
||||||
|
|
||||||
return k8sClient.Update(ctx, &rs)
|
|
||||||
},
|
|
||||||
time.Second*1, time.Millisecond*500).Should(BeNil())
|
|
||||||
|
|
||||||
runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
|
|
||||||
|
|
||||||
Eventually(
|
|
||||||
func() int {
|
|
||||||
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
|
|
||||||
MatchLabels: map[string]string{
|
MatchLabels: map[string]string{
|
||||||
"foo": "bar",
|
"foo": "bar",
|
||||||
},
|
},
|
||||||
})
|
},
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Template: actionsv1alpha1.RunnerTemplate{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
if err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name), client.MatchingLabelsSelector{Selector: selector}); err != nil {
|
Labels: map[string]string{
|
||||||
logf.Log.Error(err, "list runners")
|
"foo": "bar",
|
||||||
return -1
|
},
|
||||||
}
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerSpec{
|
||||||
runnersList.Sync(runners.Items)
|
RunnerConfig: actionsv1alpha1.RunnerConfig{
|
||||||
|
Repository: "test/valid",
|
||||||
return len(runners.Items)
|
Image: "bar",
|
||||||
|
},
|
||||||
|
RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
|
||||||
|
Env: []corev1.EnvVar{
|
||||||
|
{Name: "FOO", Value: "FOOVALUE"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
time.Second*5, time.Millisecond*500).Should(BeEquivalentTo(0))
|
}
|
||||||
|
|
||||||
|
err := k8sClient.Create(ctx, rs)
|
||||||
|
|
||||||
|
Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")
|
||||||
|
|
||||||
|
Eventually(
|
||||||
|
getRunnerCount,
|
||||||
|
time.Second*5, time.Second).Should(BeEquivalentTo(2))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should not create any runners when specified 0 replicas", func() {
|
||||||
|
{
|
||||||
|
rs := &actionsv1alpha1.RunnerReplicaSet{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
Namespace: ns.Name,
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerReplicaSetSpec{
|
||||||
|
Replicas: intPtr(0),
|
||||||
|
Selector: &metav1.LabelSelector{
|
||||||
|
MatchLabels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Template: actionsv1alpha1.RunnerTemplate{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Labels: map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: actionsv1alpha1.RunnerSpec{
|
||||||
|
RunnerConfig: actionsv1alpha1.RunnerConfig{
|
||||||
|
Repository: "test/valid",
|
||||||
|
Image: "bar",
|
||||||
|
},
|
||||||
|
RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
|
||||||
|
Env: []corev1.EnvVar{
|
||||||
|
{Name: "FOO", Value: "FOOVALUE"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := k8sClient.Create(ctx, rs)
|
||||||
|
|
||||||
|
Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")
|
||||||
|
|
||||||
|
Consistently(
|
||||||
|
getRunnerCount,
|
||||||
|
time.Second*5, time.Second).Should(BeEquivalentTo(0))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -22,8 +22,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
appsv1 "k8s.io/api/apps/v1"
|
appsv1 "k8s.io/api/apps/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/errors"
|
|
||||||
"k8s.io/apimachinery/pkg/types"
|
|
||||||
|
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
@@ -38,10 +36,6 @@ import (
|
|||||||
"github.com/go-logr/logr"
|
"github.com/go-logr/logr"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
LabelKeyRunnerSetName = "runnerset-name"
|
|
||||||
)
|
|
||||||
|
|
||||||
// RunnerSetReconciler reconciles a Runner object
|
// RunnerSetReconciler reconciles a Runner object
|
||||||
type RunnerSetReconciler struct {
|
type RunnerSetReconciler struct {
|
||||||
Name string
|
Name string
|
||||||
@@ -64,6 +58,7 @@ type RunnerSetReconciler struct {
|
|||||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnersets/status,verbs=get;update;patch
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnersets/status,verbs=get;update;patch
|
||||||
// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
|
// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
|
||||||
// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch
|
// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch
|
||||||
|
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
|
||||||
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||||
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update
|
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update
|
||||||
|
|
||||||
@@ -90,6 +85,18 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
|
|
||||||
metrics.SetRunnerSet(*runnerSet)
|
metrics.SetRunnerSet(*runnerSet)
|
||||||
|
|
||||||
|
var statefulsetList appsv1.StatefulSetList
|
||||||
|
if err := r.List(ctx, &statefulsetList, client.InNamespace(req.Namespace), client.MatchingFields{runnerSetOwnerKey: req.Name}); err != nil {
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
statefulsets := statefulsetList.Items
|
||||||
|
|
||||||
|
if len(statefulsets) > 1000 {
|
||||||
|
log.Info("Postponed reconcilation to prevent potential infinite loop. If you're really scaling more than 1000 statefulsets, do change this hard-coded threshold!")
|
||||||
|
return ctrl.Result{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
desiredStatefulSet, err := r.newStatefulSet(runnerSet)
|
desiredStatefulSet, err := r.newStatefulSet(runnerSet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Recorder.Event(runnerSet, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
r.Recorder.Event(runnerSet, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
|
||||||
@@ -99,107 +106,49 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
liveStatefulSet := &appsv1.StatefulSet{}
|
addedReplicas := int32(1)
|
||||||
if err := r.Get(ctx, types.NamespacedName{Namespace: runnerSet.Namespace, Name: runnerSet.Name}, liveStatefulSet); err != nil {
|
create := desiredStatefulSet.DeepCopy()
|
||||||
if !errors.IsNotFound(err) {
|
create.Spec.Replicas = &addedReplicas
|
||||||
log.Error(err, "Failed to get live statefulset")
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := r.Client.Create(ctx, desiredStatefulSet); err != nil {
|
|
||||||
log.Error(err, "Failed to create statefulset resource")
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
liveTemplateHash, ok := getStatefulSetTemplateHash(liveStatefulSet)
|
|
||||||
if !ok {
|
|
||||||
log.Info("Failed to get template hash of newest statefulset resource. It must be in an invalid state. Please manually delete the statefulset so that it is recreated")
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
desiredTemplateHash, ok := getStatefulSetTemplateHash(desiredStatefulSet)
|
|
||||||
if !ok {
|
|
||||||
log.Info("Failed to get template hash of desired statefulset. It must be in an invalid state. Please manually delete the statefulset so that it is recreated")
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if liveTemplateHash != desiredTemplateHash {
|
|
||||||
copy := liveStatefulSet.DeepCopy()
|
|
||||||
copy.Spec = desiredStatefulSet.Spec
|
|
||||||
|
|
||||||
if err := r.Client.Patch(ctx, copy, client.MergeFrom(liveStatefulSet)); err != nil {
|
|
||||||
log.Error(err, "Failed to patch statefulset", "reason", errors.ReasonForError(err))
|
|
||||||
|
|
||||||
if errors.IsInvalid(err) {
|
|
||||||
// NOTE: This might not be ideal but is currently required to deal with the forbidden error by recreating the statefulset
|
|
||||||
//
|
|
||||||
// 2021-06-13T07:19:52.760Z ERROR actions-runner-controller.runnerset Failed to patch statefulset
|
|
||||||
// {"runnerset": "default/example-runnerset", "error": "StatefulSet.apps \"example-runnerset\" is invalid: s
|
|
||||||
// pec: Forbidden: updates to statefulset spec for fields other than 'replicas', 'template', and 'updateStrategy'
|
|
||||||
// are forbidden"}
|
|
||||||
//
|
|
||||||
// Even though the error message includes "Forbidden", this error's reason is "Invalid".
|
|
||||||
// That's why we're using errors.IsInvalid above.
|
|
||||||
|
|
||||||
if err := r.Client.Delete(ctx, liveStatefulSet); err != nil {
|
|
||||||
log.Error(err, "Failed to delete statefulset for force-update")
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
log.Info("Deleted statefulset for force-update")
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// We requeue in order to clean up old runner replica sets later.
|
|
||||||
// Otherwise, they aren't cleaned up until the next re-sync interval.
|
|
||||||
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
const defaultReplicas = 1
|
const defaultReplicas = 1
|
||||||
|
|
||||||
var replicasOfLiveStatefulSet *int
|
|
||||||
if liveStatefulSet.Spec.Replicas != nil {
|
|
||||||
v := int(*liveStatefulSet.Spec.Replicas)
|
|
||||||
replicasOfLiveStatefulSet = &v
|
|
||||||
}
|
|
||||||
|
|
||||||
var replicasOfDesiredStatefulSet *int
|
var replicasOfDesiredStatefulSet *int
|
||||||
if desiredStatefulSet.Spec.Replicas != nil {
|
if desiredStatefulSet.Spec.Replicas != nil {
|
||||||
v := int(*desiredStatefulSet.Spec.Replicas)
|
v := int(*desiredStatefulSet.Spec.Replicas)
|
||||||
replicasOfDesiredStatefulSet = &v
|
replicasOfDesiredStatefulSet = &v
|
||||||
}
|
}
|
||||||
|
|
||||||
currentDesiredReplicas := getIntOrDefault(replicasOfLiveStatefulSet, defaultReplicas)
|
|
||||||
newDesiredReplicas := getIntOrDefault(replicasOfDesiredStatefulSet, defaultReplicas)
|
newDesiredReplicas := getIntOrDefault(replicasOfDesiredStatefulSet, defaultReplicas)
|
||||||
|
|
||||||
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
|
effectiveTime := runnerSet.Spec.EffectiveTime
|
||||||
if currentDesiredReplicas != newDesiredReplicas {
|
ephemeral := runnerSet.Spec.Ephemeral == nil || *runnerSet.Spec.Ephemeral
|
||||||
v := int32(newDesiredReplicas)
|
|
||||||
|
|
||||||
updated := liveStatefulSet.DeepCopy()
|
var owners []client.Object
|
||||||
updated.Spec.Replicas = &v
|
|
||||||
|
|
||||||
if err := r.Client.Patch(ctx, updated, client.MergeFrom(liveStatefulSet)); err != nil {
|
for _, ss := range statefulsets {
|
||||||
log.Error(err, "Failed to update statefulset")
|
ss := ss
|
||||||
|
owners = append(owners, &ss)
|
||||||
return ctrl.Result{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctrl.Result{}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
statusReplicas := int(liveStatefulSet.Status.Replicas)
|
if res, err := syncVolumes(ctx, r.Client, log, req.Namespace, runnerSet, statefulsets); err != nil {
|
||||||
statusReadyReplicas := int(liveStatefulSet.Status.ReadyReplicas)
|
return ctrl.Result{}, err
|
||||||
totalCurrentReplicas := int(liveStatefulSet.Status.CurrentReplicas)
|
} else if res != nil {
|
||||||
updatedReplicas := int(liveStatefulSet.Status.UpdatedReplicas)
|
return *res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, newDesiredReplicas, func() client.Object { return create.DeepCopy() }, ephemeral, owners)
|
||||||
|
if err != nil || res == nil {
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var statusReplicas, statusReadyReplicas, totalCurrentReplicas, updatedReplicas int
|
||||||
|
|
||||||
|
for _, ss := range res.currentObjects {
|
||||||
|
statusReplicas += int(ss.statefulSet.Status.Replicas)
|
||||||
|
statusReadyReplicas += int(ss.statefulSet.Status.ReadyReplicas)
|
||||||
|
totalCurrentReplicas += int(ss.statefulSet.Status.CurrentReplicas)
|
||||||
|
updatedReplicas += int(ss.statefulSet.Status.UpdatedReplicas)
|
||||||
|
}
|
||||||
|
|
||||||
status := runnerSet.Status.DeepCopy()
|
status := runnerSet.Status.DeepCopy()
|
||||||
|
|
||||||
@@ -224,12 +173,6 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getStatefulSetTemplateHash(rs *appsv1.StatefulSet) (string, bool) {
|
|
||||||
hash, ok := rs.Labels[LabelKeyRunnerTemplateHash]
|
|
||||||
|
|
||||||
return hash, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func getRunnerSetSelector(runnerSet *v1alpha1.RunnerSet) *metav1.LabelSelector {
|
func getRunnerSetSelector(runnerSet *v1alpha1.RunnerSet) *metav1.LabelSelector {
|
||||||
selector := runnerSet.Spec.Selector
|
selector := runnerSet.Spec.Selector
|
||||||
if selector == nil {
|
if selector == nil {
|
||||||
@@ -245,21 +188,14 @@ var LabelValuePodMutation = "true"
|
|||||||
func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*appsv1.StatefulSet, error) {
|
func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*appsv1.StatefulSet, error) {
|
||||||
runnerSetWithOverrides := *runnerSet.Spec.DeepCopy()
|
runnerSetWithOverrides := *runnerSet.Spec.DeepCopy()
|
||||||
|
|
||||||
for _, l := range r.CommonRunnerLabels {
|
runnerSetWithOverrides.Labels = append(runnerSetWithOverrides.Labels, r.CommonRunnerLabels...)
|
||||||
runnerSetWithOverrides.Labels = append(runnerSetWithOverrides.Labels, l)
|
|
||||||
}
|
|
||||||
|
|
||||||
// This label selector is used by default when rd.Spec.Selector is empty.
|
|
||||||
runnerSetWithOverrides.Template.ObjectMeta.Labels = CloneAndAddLabel(runnerSetWithOverrides.Template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerSet.Name)
|
|
||||||
|
|
||||||
runnerSetWithOverrides.Template.ObjectMeta.Labels = CloneAndAddLabel(runnerSetWithOverrides.Template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
|
|
||||||
|
|
||||||
template := corev1.Pod{
|
template := corev1.Pod{
|
||||||
ObjectMeta: runnerSetWithOverrides.StatefulSetSpec.Template.ObjectMeta,
|
ObjectMeta: runnerSetWithOverrides.StatefulSetSpec.Template.ObjectMeta,
|
||||||
Spec: runnerSetWithOverrides.StatefulSetSpec.Template.Spec,
|
Spec: runnerSetWithOverrides.StatefulSetSpec.Template.Spec,
|
||||||
}
|
}
|
||||||
|
|
||||||
pod, err := newRunnerPod(template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL, false)
|
pod, err := newRunnerPod(runnerSet.Name, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -288,9 +224,12 @@ func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*ap
|
|||||||
rs := appsv1.StatefulSet{
|
rs := appsv1.StatefulSet{
|
||||||
TypeMeta: metav1.TypeMeta{},
|
TypeMeta: metav1.TypeMeta{},
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: runnerSet.ObjectMeta.Name,
|
GenerateName: runnerSet.ObjectMeta.Name + "-",
|
||||||
Namespace: runnerSet.ObjectMeta.Namespace,
|
Namespace: runnerSet.ObjectMeta.Namespace,
|
||||||
Labels: CloneAndAddLabel(runnerSet.ObjectMeta.Labels, LabelKeyRunnerTemplateHash, templateHash),
|
Labels: CloneAndAddLabel(runnerSet.ObjectMeta.Labels, LabelKeyRunnerTemplateHash, templateHash),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
SyncTimeAnnotationKey: time.Now().Format(time.RFC3339),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Spec: runnerSetWithOverrides.StatefulSetSpec,
|
Spec: runnerSetWithOverrides.StatefulSetSpec,
|
||||||
}
|
}
|
||||||
@@ -310,6 +249,22 @@ func (r *RunnerSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|||||||
|
|
||||||
r.Recorder = mgr.GetEventRecorderFor(name)
|
r.Recorder = mgr.GetEventRecorderFor(name)
|
||||||
|
|
||||||
|
if err := mgr.GetFieldIndexer().IndexField(context.TODO(), &appsv1.StatefulSet{}, runnerSetOwnerKey, func(rawObj client.Object) []string {
|
||||||
|
set := rawObj.(*appsv1.StatefulSet)
|
||||||
|
owner := metav1.GetControllerOf(set)
|
||||||
|
if owner == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if owner.APIVersion != v1alpha1.GroupVersion.String() || owner.Kind != "RunnerSet" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return []string{owner.Name}
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return ctrl.NewControllerManagedBy(mgr).
|
return ctrl.NewControllerManagedBy(mgr).
|
||||||
For(&v1alpha1.RunnerSet{}).
|
For(&v1alpha1.RunnerSet{}).
|
||||||
Owns(&appsv1.StatefulSet{}).
|
Owns(&appsv1.StatefulSet{}).
|
||||||
|
|||||||
@@ -605,3 +605,13 @@ func parseAndMatchRecurringPeriod(now time.Time, start, end, frequency, until st
|
|||||||
|
|
||||||
return MatchSchedule(now, startTime, endTime, RecurrenceRule{Frequency: frequency, UntilTime: untilTime})
|
return MatchSchedule(now, startTime, endTime, RecurrenceRule{Frequency: frequency, UntilTime: untilTime})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func FuzzMatchSchedule(f *testing.F) {
|
||||||
|
start := time.Now()
|
||||||
|
end := time.Now()
|
||||||
|
now := time.Now()
|
||||||
|
f.Fuzz(func(t *testing.T, freq string) {
|
||||||
|
// Verify that it never panics
|
||||||
|
_, _, _ = MatchSchedule(now, start, end, RecurrenceRule{Frequency: freq})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
181
controllers/sync_volumes.go
Normal file
181
controllers/sync_volumes.go
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
package controllers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
|
"github.com/go-logr/logr"
|
||||||
|
appsv1 "k8s.io/api/apps/v1"
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
labelKeyCleanup = "pending-cleanup"
|
||||||
|
labelKeyRunnerStatefulSetName = "runner-statefulset-name"
|
||||||
|
)
|
||||||
|
|
||||||
|
func syncVolumes(ctx context.Context, c client.Client, log logr.Logger, ns string, runnerSet *v1alpha1.RunnerSet, statefulsets []appsv1.StatefulSet) (*ctrl.Result, error) {
|
||||||
|
log = log.WithValues("ns", ns)
|
||||||
|
|
||||||
|
for _, t := range runnerSet.Spec.StatefulSetSpec.VolumeClaimTemplates {
|
||||||
|
for _, sts := range statefulsets {
|
||||||
|
pvcName := fmt.Sprintf("%s-%s-0", t.Name, sts.Name)
|
||||||
|
|
||||||
|
var pvc corev1.PersistentVolumeClaim
|
||||||
|
if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: pvcName}, &pvc); err != nil {
|
||||||
|
if !kerrors.IsNotFound(err) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO move this to statefulset reconciler so that we spam this less,
|
||||||
|
// by starting the loop only after the statefulset got deletionTimestamp set.
|
||||||
|
// Perhaps you can just wrap this in a finalizer here.
|
||||||
|
if pvc.Labels[labelKeyRunnerStatefulSetName] == "" {
|
||||||
|
updated := pvc.DeepCopy()
|
||||||
|
updated.Labels[labelKeyRunnerStatefulSetName] = sts.Name
|
||||||
|
if err := c.Update(ctx, updated); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
log.V(1).Info("Added runner-statefulset-name label to PVC", "sts", sts.Name, "pvc", pvcName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// PVs are not namespaced hence we don't need client.InNamespace(ns).
|
||||||
|
// If we added that, c.List will silently return zero items.
|
||||||
|
//
|
||||||
|
// This `List` needs to be done in a dedicated reconciler that is registered to the manager via the `For` func.
|
||||||
|
// Otherwise the List func might return outdated contents(I saw status.phase being Bound even after K8s updated it to Released, and it lasted minutes).
|
||||||
|
//
|
||||||
|
// cleanupLabels := map[string]string{
|
||||||
|
// labelKeyCleanup: runnerSet.Name,
|
||||||
|
// }
|
||||||
|
// pvList := &corev1.PersistentVolumeList{}
|
||||||
|
// if err := c.List(ctx, pvList, client.MatchingLabels(cleanupLabels)); err != nil {
|
||||||
|
// log.Info("retrying pv listing", "ns", ns, "err", err)
|
||||||
|
// return nil, err
|
||||||
|
// }
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func syncPVC(ctx context.Context, c client.Client, log logr.Logger, ns string, pvc *corev1.PersistentVolumeClaim) (*ctrl.Result, error) {
|
||||||
|
stsName := pvc.Labels[labelKeyRunnerStatefulSetName]
|
||||||
|
if stsName == "" {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Reconciling runner PVC")
|
||||||
|
|
||||||
|
var sts appsv1.StatefulSet
|
||||||
|
if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: stsName}, &sts); err != nil {
|
||||||
|
if !kerrors.IsNotFound(err) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// We assume that the statefulset is shortly terminated, hence retry forever until it gets removed.
|
||||||
|
retry := 10 * time.Second
|
||||||
|
log.V(1).Info("Retrying sync until statefulset gets removed", "requeueAfter", retry)
|
||||||
|
return &ctrl.Result{RequeueAfter: retry}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log = log.WithValues("sts", stsName)
|
||||||
|
|
||||||
|
pvName := pvc.Spec.VolumeName
|
||||||
|
|
||||||
|
if pvName != "" {
|
||||||
|
// If we deleted PVC before unsetting pv.spec.claimRef,
|
||||||
|
// K8s seems to revive the claimRef :thinking:
|
||||||
|
// So we need to mark PV for claimRef unset first, and delete PVC, and finally unset claimRef on PV.
|
||||||
|
|
||||||
|
var pv corev1.PersistentVolume
|
||||||
|
if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: pvName}, &pv); err != nil {
|
||||||
|
if !kerrors.IsNotFound(err) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
pvCopy := pv.DeepCopy()
|
||||||
|
if pvCopy.Labels == nil {
|
||||||
|
pvCopy.Labels = map[string]string{}
|
||||||
|
}
|
||||||
|
pvCopy.Labels[labelKeyCleanup] = stsName
|
||||||
|
|
||||||
|
log.V(2).Info("Scheduling to unset PV's claimRef", "pv", pv.Name)
|
||||||
|
|
||||||
|
// Apparently K8s doesn't reconcile PV immediately after PVC deletion.
|
||||||
|
// So we start a relatively busy loop of PV reconcilation slightly before the PVC deletion,
|
||||||
|
// so that PV can be unbound as soon as possible after the PVC got deleted.
|
||||||
|
if err := c.Update(ctx, pvCopy); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Updated PV to unset claimRef")
|
||||||
|
|
||||||
|
// At this point, the PV is still Bound
|
||||||
|
|
||||||
|
log.V(2).Info("Deleting unused PVC")
|
||||||
|
|
||||||
|
if err := c.Delete(ctx, pvc); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Deleted unused PVC")
|
||||||
|
|
||||||
|
// At this point, the PV is still "Bound", but we are ready to unset pv.spec.claimRef in pv controller.
|
||||||
|
// Once the pv controller unsets claimRef, the PV becomes "Released", hence available for reuse by another eligible PVC.
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func syncPV(ctx context.Context, c client.Client, log logr.Logger, ns string, pv *corev1.PersistentVolume) (*ctrl.Result, error) {
|
||||||
|
if pv.Spec.ClaimRef == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Reconciling PV")
|
||||||
|
|
||||||
|
if pv.Labels[labelKeyCleanup] == "" {
|
||||||
|
// We assume that the pvc is shortly terminated, hence retry forever until it gets removed.
|
||||||
|
retry := 10 * time.Second
|
||||||
|
log.V(1).Info("Retrying sync until pvc gets removed", "requeueAfter", retry)
|
||||||
|
return &ctrl.Result{RequeueAfter: retry}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("checking pv phase", "phase", pv.Status.Phase)
|
||||||
|
|
||||||
|
if pv.Status.Phase != corev1.VolumeReleased {
|
||||||
|
// We assume that the pvc is shortly terminated, hence retry forever until it gets removed.
|
||||||
|
retry := 10 * time.Second
|
||||||
|
log.V(1).Info("Retrying sync until pvc gets released", "requeueAfter", retry)
|
||||||
|
return &ctrl.Result{RequeueAfter: retry}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point, the PV is still Released
|
||||||
|
|
||||||
|
pvCopy := pv.DeepCopy()
|
||||||
|
delete(pvCopy.Labels, labelKeyCleanup)
|
||||||
|
pvCopy.Spec.ClaimRef = nil
|
||||||
|
log.V(2).Info("Unsetting PV's claimRef", "pv", pv.Name)
|
||||||
|
if err := c.Update(ctx, pvCopy); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("PV should be Available now")
|
||||||
|
|
||||||
|
// At this point, the PV becomes Available, if it's reclaim policy is "Retain".
|
||||||
|
// I have not yet tested it with "Delete" but perhaps it's deleted automatically after the update?
|
||||||
|
// https://kubernetes.io/docs/concepts/storage/persistent-volumes/#retain
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
74
docs/releasenotes/0.22.md
Normal file
74
docs/releasenotes/0.22.md
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# actions-runner-controller v0.22.0
|
||||||
|
|
||||||
|
This version of ARC focuses on scalability and reliablity of runners.
|
||||||
|
|
||||||
|
## GitHub API Cache
|
||||||
|
|
||||||
|
In terms of scalability, ARC now caches GitHub API responses according to their recommendation(=Cache-Control header[^1]).
|
||||||
|
As long as GitHub keeps its current behavior, it will result in ARC to cache various List Runners API and List Workflow Jobs calls for 60 seconds.
|
||||||
|
|
||||||
|
[^1]: https://docs.github.com/en/rest/overview/resources-in-the-rest-api#conditional-requests
|
||||||
|
|
||||||
|
The cache for List Runners API is expecially important, as their responses can be shared between every runner under the same scope (repository, organization, or enterprise).
|
||||||
|
|
||||||
|
In previous versions of ARC, the number of List Runners API calls had scaled proportional to the number of runners managed by ARC.
|
||||||
|
Thanks to the addition of cache, since v0.22.0, it may scale proportional to the number of runner scopes (=The number of repositories for your repository runners + The number of organizations for your organizational runners + The number of enterprises for your enterprise runners). You might be able to scale to hundreds of runners depending on your environemnt.
|
||||||
|
|
||||||
|
Please share your experience if you successfully scaled to a level that wasn't possible with previous versions!
|
||||||
|
|
||||||
|
## Improved Runner Scale Down Process
|
||||||
|
|
||||||
|
In terms of reliability, the first thing to note is that it has a new scale down process for both RunnerDeployment and RunnerSet.
|
||||||
|
|
||||||
|
Previously every runner pod can restart immediately after the completion, while at the same time ARC might mark the same runner pod for deletion due to scale down.
|
||||||
|
That resulted in various race conditions that terminated the runner prematurely while running a workflow job[^2].
|
||||||
|
|
||||||
|
[^2]: See [this issue](https://github.com/actions-runner-controller/actions-runner-controller/issues/911) for more context.
|
||||||
|
|
||||||
|
And it's now fixed. The new scale down process ensures that the runner has been registered successfully and then de-registered from GitHub Actions, before starting the runner pod deletion process.
|
||||||
|
Any runner pod can't be terminated while being restarting or running a job now, which makes it impossible to be in the middle of running a workflow job when a runner pod is being terminated. No more race conditions.
|
||||||
|
|
||||||
|
## Optimized Ephemeral Runner Termination Makes Less "Remove Runner" API calls
|
||||||
|
|
||||||
|
It is also worth mentioning that the new scale down process makes less GitHub Actions `RemoveRunner` API calls, which contributes to more scallability.
|
||||||
|
|
||||||
|
Two enhancements had been made on that.
|
||||||
|
|
||||||
|
First, every runner managed by ARC now [uses `--ephemeral` by default](https://github.com/actions-runner-controller/actions-runner-controller/pull/1211).
|
||||||
|
|
||||||
|
Second, we [removed unnecessary `RemoveRunner` API calls](https://github.com/actions-runner-controller/actions-runner-controller/pull/1204) when it's an ephemeral runner that has already completed running.
|
||||||
|
|
||||||
|
[GitHub designed ephemeral runners to be automatically unregistered from GitHub Actions after running their first workflow jobs](https://github.blog/changelog/2021-09-20-github-actions-ephemeral-self-hosted-runners-new-webhooks-for-auto-scaling). It is unnecessary to call `RemoveRunner` API when the ephemeral runner pod has already completed successfully. These two enhancements aligns with that fact and it results in ARC making less API calls.
|
||||||
|
|
||||||
|
## Prevention of Unnecessary Runner Pod Recreations
|
||||||
|
|
||||||
|
Another reliability enhancement is based on the addition of a new field, `EffectiveTime`, to our RunnerDeployment and RunnerSet specifications.
|
||||||
|
|
||||||
|
The field comes in play only for ephemeral runners, and ARC uses it as an indicator of when to add more runner pods, to match the current number of runner pods to the desired number.
|
||||||
|
|
||||||
|
How that improves the reliability?
|
||||||
|
|
||||||
|
Previously, ARC had been continuously recreating runner pods as they complete, with no delay. That sometimes resulted in a runner pod to get recreated and then immediately terminated without being used at all. Not only this is a waste of cluster resource, it resulted in race conditions we explained in the previous section about "Improved Runner Scale Down Process". We fixed the race conditions as explained in the previous section, but the waste of cluster resource was still problematic.
|
||||||
|
|
||||||
|
With `EffectiveTime`, ARC defers the addition(and recreations, as ARC doesn't distinguish addition vs recreation) of
|
||||||
|
missing runner pods until the `EffectiveTime` is updated. `EffectiveTime` is updated only when the github-webhook-server of ARC updates the desired replicas number, ARC adds/recreates runner pods only after the webhook server updates it, the issue is resolved.
|
||||||
|
|
||||||
|
This can be an unnecessary detail, but anyway- the "defer" mechanism times out after the `DefaultRunnerPodRecreationDelayAfterWebhookScale` duration, which is currently hard-coded to 10 minutes. So in case ARC missed receiving a webhook event for proper scaling, it converges to the desired replicas after 10 minutes anyway, so that the current state eventually syncs up with the desired state.
|
||||||
|
|
||||||
|
Note that `EffectiveTime` fields are set by HRA controller for any RunnerDeployment and RunnerSet that manages ephemeral runners. That means, it is enabled regardless of the type of autoscaler you're using, webhook or API polling based ones. It isn't enabled for static(persistent) runners.
|
||||||
|
|
||||||
|
There's currently no way to opt-out of `EffectiveTime` because the author of the feature(@mumoshu) thought it's unneeded. Please open a GitHub issue with details on your use-case if you do need to opt-out.
|
||||||
|
|
||||||
|
## Generalized Runner Pod Management Logic
|
||||||
|
|
||||||
|
This one might not be an user-visible change, but I'm explaining it for anyone who may wonder.
|
||||||
|
|
||||||
|
Since this version, ARC uses the same logic for `RunnerDeployment` and `RunnerSet`. `RunnerDeployment` is Pod-based and `RunnerSet` is StatefulSet-based. That remains unchanged. But the most of the logic about how runner pods are managed is shared between the two.
|
||||||
|
|
||||||
|
The only difference is that what adapters those variants pass to the generalized logic. `RunnerDeployment` uses `RunnerReplicaSet`(our another Kubernetes custom resource that powers `RunnerDeployment`) as an owner of a runner pod, and `RunnerSet` uses `StatefulSet`(it's vanilla Kubernetes StatefulSet) as an owner of a runner pod.
|
||||||
|
|
||||||
|
This refactoring turned out to enable us to make `RunnerSet` as reliable as `RunnerDeployment`. `RunnerSet` has been considered an experimental feature
|
||||||
|
even though it is more customizable than `RunnerDeployment` and has a support for Persistent Volume Claim(PVC)s.
|
||||||
|
But since it now uses the same logic under the hood, `RunnerSet` can be considered more production-ready than before.
|
||||||
|
|
||||||
|
If you staed away from using `RunnerSet` due to that, please try it and report anything you experienced!
|
||||||
89
docs/releasenotes/0.23.md
Normal file
89
docs/releasenotes/0.23.md
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# actions-runner-controller v0.23.0
|
||||||
|
|
||||||
|
All changes in this release can be found in the milestone https://github.com/actions-runner-controller/actions-runner-controller/milestone/3
|
||||||
|
|
||||||
|
This log documents breaking and major enhancements
|
||||||
|
## BREAKING CHANGE : Workflow job webhooks require an explicit field set
|
||||||
|
|
||||||
|
Previously the webhook event workflow job was set as the default if no `githubEvent` was set.
|
||||||
|
|
||||||
|
**Migration Steps**
|
||||||
|
|
||||||
|
Change this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
scaleUpTriggers:
|
||||||
|
- githubEvent: {}
|
||||||
|
duration: "30m"
|
||||||
|
```
|
||||||
|
|
||||||
|
To this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
scaleUpTriggers:
|
||||||
|
- githubEvent:
|
||||||
|
workflowJob: {}
|
||||||
|
duration: "30m"
|
||||||
|
```
|
||||||
|
|
||||||
|
## BREAKING CHANGE : topologySpreadConstraints renamed to topologySpreadConstraint
|
||||||
|
|
||||||
|
Previously to use the pod `topologySpreadConstraint:` attribute in your runners you had to set `topologySpreadConstraints:` instead, this was a typo and has been corrected.
|
||||||
|
|
||||||
|
**Migration Steps**
|
||||||
|
|
||||||
|
Update your runners to use `topologySpreadConstraints:` instead
|
||||||
|
|
||||||
|
## BREAKING CHANGE : Default sync period is now 1 minute instead of 10 minutes
|
||||||
|
|
||||||
|
Since caching as been implemented the default sync period of 10 minutes is unnecessarily conservative and gives a poor out of the box user experience. If you need a 10 minute sync period ensure you explicitly set this value.
|
||||||
|
|
||||||
|
**Migration Steps**
|
||||||
|
|
||||||
|
Update your sync period, how this is done will depend on how you've deployed ARC.
|
||||||
|
|
||||||
|
## BREAKING CHANGE : A metric is set by default
|
||||||
|
|
||||||
|
Previously if no metric was provided and you were using pull based scaling the `TotalNumberOfQueuedAndInProgressWorkflowRuns` was metric applied. No default is set now.
|
||||||
|
|
||||||
|
**Migration Steps**
|
||||||
|
|
||||||
|
Add in the `TotalNumberOfQueuedAndInProgressWorkflowRuns` metric where you are currenty relying on it
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: RunnerDeployment
|
||||||
|
metadata:
|
||||||
|
name: example-runner-deployment
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
organisation: my-awesome-organisation
|
||||||
|
labels:
|
||||||
|
- my-awesome-runner
|
||||||
|
---
|
||||||
|
apiVersion: actions.summerwind.dev/v1alpha1
|
||||||
|
kind: HorizontalRunnerAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: example-runner-deployment-autoscaler
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
name: example-runner-deployment
|
||||||
|
minReplicas: 1
|
||||||
|
maxReplicas: 5
|
||||||
|
metrics:
|
||||||
|
- type: TotalNumberOfQueuedAndInProgressWorkflowRuns
|
||||||
|
repositoryNames:
|
||||||
|
- owner/my-awesome-repo-1
|
||||||
|
- owner/my-awesome-repo-2
|
||||||
|
- owner/my-awesome-repo-3
|
||||||
|
```
|
||||||
|
|
||||||
|
## ENHANCEMENT : Find runner groups that visible to repository using a single API call
|
||||||
|
|
||||||
|
GitHub has contributed code to utilise a new API to enable us to get a repositories runner groups with a single API call. This enables us to scale runners based on the requesting repositories runner group membership without a series of expensive API queries.
|
||||||
|
|
||||||
|
This is an opt-in feature currently as it's a significant change in behaviour if enabled, additionally, whilst scaling based on the repositories runner group membership is supported in both GHES and github.com, only github.com currently has access to the new raate-limit budget friendly API.
|
||||||
|
|
||||||
|
To enable this set deploy via Helm and set `githubWebhookServer.useRunnerGroupsVisibility` to `true`.
|
||||||
54
docs/releasenotes/0.24.md
Normal file
54
docs/releasenotes/0.24.md
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
# actions-runner-controller v0.24.0
|
||||||
|
|
||||||
|
All changes in this release can be found in the milestone https://github.com/actions-runner-controller/actions-runner-controller/milestone/4
|
||||||
|
|
||||||
|
This log documents breaking and major enhancements
|
||||||
|
|
||||||
|
## Upgrading
|
||||||
|
|
||||||
|
In case you're using our Helm chart to deploy ARC, use the chart 0.19.0 or greater. Don't miss upgrading CRDs as usual! Helm doesn't upgrade CRDs.
|
||||||
|
|
||||||
|
## BREAKING CHANGE : Support for `--once` is being dropped
|
||||||
|
|
||||||
|
> **Warning**: If you're using ARC's official runer image, make sure to update the image tag to `v2.292.0` BEFORE upgrading ARC
|
||||||
|
|
||||||
|
In #1385 we changed ARC to NOT automatically set the feature flag `RUNNER_FEATURE_FLAG_EPHEMERAL=true`. If you're using ARC's official runer image, make sure to update the image tag to `v2.292.0` before upgrading ARC, because that's the first runner image release since we changed the default to `--ephemeral`. If you kept using an older runner image after upgrading ARC, you end up using `--once` which is unreliable and had been deprecated since almost a year ago.
|
||||||
|
|
||||||
|
>> **Warning**: If you're using a custom runner image, incorporate changes made in #1384 to your runner image dockerfile
|
||||||
|
|
||||||
|
If you're building a custom runner image on your own and it still requires the user to specify `RUNNER_FEATURE_FLAG_EPHEMERAL=true` to use `--ephemeral`, check #1384 and update your custom runner image dockerfile accordingly. Otherwise, you may unexpectedly end up with using `--once` after upgrading ARC, because that was the previous default.
|
||||||
|
|
||||||
|
Relevant PR(s): #1384, #1385
|
||||||
|
|
||||||
|
## FIX : Prevent runner form stucking in Terminating when the container disappeared
|
||||||
|
|
||||||
|
We occasionally heard about runnner pods stuck in Terminating after the node and containers running on it disappeared due to, for example, the machine terminated prematurely.
|
||||||
|
|
||||||
|
We now set runner pods' restartPolicy to `Never` and remove runner pods stuck in `Waiting` after restarting, so that the pods are more likely to NOT stuck forever.
|
||||||
|
|
||||||
|
Relevant PR(s): #1395, #1420
|
||||||
|
|
||||||
|
## ENHANCEMENT : Support arbitrarily setting `privileged: true` for runner container
|
||||||
|
|
||||||
|
This is a frequently asked feature that alows you to force `privileged: true` in case you don't need docker but still need privileged tasks to be run in a job step.
|
||||||
|
|
||||||
|
In combination with a container runtime like `sysbox` this should enable you to run docker builds within the dind sidecar, all without privileges. See [the discussion related to Sysbox](https://github.com/actions-runner-controller/actions-runner-controller/discussions/977) for more information.
|
||||||
|
|
||||||
|
Note that we ARC maintainers still have no bandwidth to provide a complete description on how to make ARC work with `sysbox` yet, but almost certainly we'd welcome contributions to the documentation if you managed to make it work.
|
||||||
|
|
||||||
|
Relevant PR(s): #1383
|
||||||
|
|
||||||
|
## ENHANCEMENT : RunnerSet can now retain PVs accross restarts
|
||||||
|
|
||||||
|
This enhancement makes it more practical to use RunnerSet in combination with `volumeClaimTemplates` to make your workflow jobs faster.
|
||||||
|
|
||||||
|
Please see our updated ["Custom Volume Mounts" section in the documentation](https://github.com/actions-runner-controller/actions-runner-controller#custom-volume-mounts) for more information. Currently, we cover caching Docker image layers, go mod/build, and PV-backed runner work directory(Although this one is backed by another feature unrelated to this enhancement under the hood).
|
||||||
|
|
||||||
|
Relevant PR(s): #1340
|
||||||
|
|
||||||
|
## ENHANCEMENT : OpenSSF scorecard adoption
|
||||||
|
|
||||||
|
We assessed the project's security by following OpenSSF scorecard checks and adopting OpenSSF best practices.
|
||||||
|
It should help you judge the security throughout ARC's development and release processes.
|
||||||
|
|
||||||
|
Relevant PR(s): #1461
|
||||||
@@ -162,6 +162,10 @@ func NewServer(opts ...Option) *httptest.Server {
|
|||||||
},
|
},
|
||||||
|
|
||||||
// For RemoveRunner
|
// For RemoveRunner
|
||||||
|
"/repos/test/valid/actions/runners/0": &Handler{
|
||||||
|
Status: http.StatusNoContent,
|
||||||
|
Body: "",
|
||||||
|
},
|
||||||
"/repos/test/valid/actions/runners/1": &Handler{
|
"/repos/test/valid/actions/runners/1": &Handler{
|
||||||
Status: http.StatusNoContent,
|
Status: http.StatusNoContent,
|
||||||
Body: "",
|
Body: "",
|
||||||
|
|||||||
191
github/github.go
191
github/github.go
@@ -11,8 +11,11 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/github/metrics"
|
"github.com/actions-runner-controller/actions-runner-controller/github/metrics"
|
||||||
"github.com/bradleyfalzon/ghinstallation"
|
"github.com/actions-runner-controller/actions-runner-controller/logging"
|
||||||
|
"github.com/bradleyfalzon/ghinstallation/v2"
|
||||||
|
"github.com/go-logr/logr"
|
||||||
"github.com/google/go-github/v39/github"
|
"github.com/google/go-github/v39/github"
|
||||||
|
"github.com/gregjones/httpcache"
|
||||||
"golang.org/x/oauth2"
|
"golang.org/x/oauth2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -28,6 +31,8 @@ type Config struct {
|
|||||||
BasicauthUsername string `split_words:"true"`
|
BasicauthUsername string `split_words:"true"`
|
||||||
BasicauthPassword string `split_words:"true"`
|
BasicauthPassword string `split_words:"true"`
|
||||||
RunnerGitHubURL string `split_words:"true"`
|
RunnerGitHubURL string `split_words:"true"`
|
||||||
|
|
||||||
|
Log *logr.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
// Client wraps GitHub client with some additional
|
// Client wraps GitHub client with some additional
|
||||||
@@ -46,7 +51,6 @@ type BasicAuthTransport struct {
|
|||||||
|
|
||||||
func (p BasicAuthTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
func (p BasicAuthTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||||
req.SetBasicAuth(p.Username, p.Password)
|
req.SetBasicAuth(p.Username, p.Password)
|
||||||
req.Header.Set("User-Agent", "actions-runner-controller")
|
|
||||||
return http.DefaultTransport.RoundTrip(req)
|
return http.DefaultTransport.RoundTrip(req)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,8 +86,11 @@ func (c *Config) NewClient() (*Client, error) {
|
|||||||
transport = tr
|
transport = tr
|
||||||
}
|
}
|
||||||
|
|
||||||
transport = metrics.Transport{Transport: transport}
|
cached := httpcache.NewTransport(httpcache.NewMemoryCache())
|
||||||
httpClient := &http.Client{Transport: transport}
|
cached.Transport = transport
|
||||||
|
loggingTransport := logging.Transport{Transport: cached, Log: c.Log}
|
||||||
|
metricsTransport := metrics.Transport{Transport: loggingTransport}
|
||||||
|
httpClient := &http.Client{Transport: metricsTransport}
|
||||||
|
|
||||||
var client *github.Client
|
var client *github.Client
|
||||||
var githubBaseURL string
|
var githubBaseURL string
|
||||||
@@ -128,6 +135,8 @@ func (c *Config) NewClient() (*Client, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
client.UserAgent = "actions-runner-controller"
|
||||||
|
|
||||||
return &Client{
|
return &Client{
|
||||||
Client: client,
|
Client: client,
|
||||||
regTokens: map[string]*github.RegistrationToken{},
|
regTokens: map[string]*github.RegistrationToken{},
|
||||||
@@ -144,8 +153,18 @@ func (c *Client) GetRegistrationToken(ctx context.Context, enterprise, org, repo
|
|||||||
key := getRegistrationKey(org, repo, enterprise)
|
key := getRegistrationKey(org, repo, enterprise)
|
||||||
rt, ok := c.regTokens[key]
|
rt, ok := c.regTokens[key]
|
||||||
|
|
||||||
// we like to give runners a chance that are just starting up and may miss the expiration date by a bit
|
// We'd like to allow the runner just starting up to miss the expiration date by a bit.
|
||||||
runnerStartupTimeout := 3 * time.Minute
|
// Note that this means that we're going to cache Creation Registraion Token API response longer than the
|
||||||
|
// recommended cache duration.
|
||||||
|
//
|
||||||
|
// https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository
|
||||||
|
// https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-an-organization
|
||||||
|
// https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-an-enterprise
|
||||||
|
// https://docs.github.com/en/rest/overview/resources-in-the-rest-api#conditional-requests
|
||||||
|
//
|
||||||
|
// This is currently set to 30 minutes as the result of the discussion took place at the following issue:
|
||||||
|
// https://github.com/actions-runner-controller/actions-runner-controller/issues/1295
|
||||||
|
runnerStartupTimeout := 30 * time.Minute
|
||||||
|
|
||||||
if ok && rt.GetExpiresAt().After(time.Now().Add(runnerStartupTimeout)) {
|
if ok && rt.GetExpiresAt().After(time.Now().Add(runnerStartupTimeout)) {
|
||||||
return rt, nil
|
return rt, nil
|
||||||
@@ -224,74 +243,9 @@ func (c *Client) ListRunners(ctx context.Context, enterprise, org, repo string)
|
|||||||
return runners, nil
|
return runners, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) GetRunnerGroupsFromRepository(ctx context.Context, org, repo string, potentialEnterpriseGroups []string, potentialOrgGroups []string) ([]string, []string, error) {
|
// ListOrganizationRunnerGroups returns all the runner groups defined in the organization and
|
||||||
|
// inherited to the organization from an enterprise.
|
||||||
var enterpriseRunnerGroups []string
|
func (c *Client) ListOrganizationRunnerGroups(ctx context.Context, org string) ([]*github.RunnerGroup, error) {
|
||||||
var orgRunnerGroups []string
|
|
||||||
|
|
||||||
if org != "" {
|
|
||||||
runnerGroups, err := c.getOrganizationRunnerGroups(ctx, org, repo)
|
|
||||||
if err != nil {
|
|
||||||
return enterpriseRunnerGroups, orgRunnerGroups, err
|
|
||||||
}
|
|
||||||
for _, runnerGroup := range runnerGroups {
|
|
||||||
if runnerGroup.GetInherited() { // enterprise runner groups
|
|
||||||
if !containsString(potentialEnterpriseGroups, runnerGroup.GetName()) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if runnerGroup.GetVisibility() == "all" {
|
|
||||||
enterpriseRunnerGroups = append(enterpriseRunnerGroups, runnerGroup.GetName())
|
|
||||||
} else {
|
|
||||||
hasAccess, err := c.hasRepoAccessToOrganizationRunnerGroup(ctx, org, runnerGroup.GetID(), repo)
|
|
||||||
if err != nil {
|
|
||||||
return enterpriseRunnerGroups, orgRunnerGroups, err
|
|
||||||
}
|
|
||||||
if hasAccess {
|
|
||||||
enterpriseRunnerGroups = append(enterpriseRunnerGroups, runnerGroup.GetName())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // organization runner groups
|
|
||||||
if !containsString(potentialOrgGroups, runnerGroup.GetName()) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if runnerGroup.GetVisibility() == "all" {
|
|
||||||
orgRunnerGroups = append(orgRunnerGroups, runnerGroup.GetName())
|
|
||||||
} else {
|
|
||||||
hasAccess, err := c.hasRepoAccessToOrganizationRunnerGroup(ctx, org, runnerGroup.GetID(), repo)
|
|
||||||
if err != nil {
|
|
||||||
return enterpriseRunnerGroups, orgRunnerGroups, err
|
|
||||||
}
|
|
||||||
if hasAccess {
|
|
||||||
orgRunnerGroups = append(orgRunnerGroups, runnerGroup.GetName())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return enterpriseRunnerGroups, orgRunnerGroups, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) hasRepoAccessToOrganizationRunnerGroup(ctx context.Context, org string, runnerGroupId int64, repo string) (bool, error) {
|
|
||||||
opts := github.ListOptions{PerPage: 100}
|
|
||||||
for {
|
|
||||||
list, res, err := c.Client.Actions.ListRepositoryAccessRunnerGroup(ctx, org, runnerGroupId, &opts)
|
|
||||||
if err != nil {
|
|
||||||
return false, fmt.Errorf("failed to list repository access for runner group: %w", err)
|
|
||||||
}
|
|
||||||
for _, githubRepo := range list.Repositories {
|
|
||||||
if githubRepo.GetFullName() == repo {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if res.NextPage == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
opts.Page = res.NextPage
|
|
||||||
}
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) getOrganizationRunnerGroups(ctx context.Context, org, repo string) ([]*github.RunnerGroup, error) {
|
|
||||||
var runnerGroups []*github.RunnerGroup
|
var runnerGroups []*github.RunnerGroup
|
||||||
|
|
||||||
opts := github.ListOptions{PerPage: 100}
|
opts := github.ListOptions{PerPage: 100}
|
||||||
@@ -311,6 +265,86 @@ func (c *Client) getOrganizationRunnerGroups(ctx context.Context, org, repo stri
|
|||||||
return runnerGroups, nil
|
return runnerGroups, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListOrganizationRunnerGroupsForRepository returns all the runner groups defined in the organization and
|
||||||
|
// inherited to the organization from an enterprise.
|
||||||
|
// We can remove this when google/go-github library is updated to support this.
|
||||||
|
func (c *Client) ListOrganizationRunnerGroupsForRepository(ctx context.Context, org, repo string) ([]*github.RunnerGroup, error) {
|
||||||
|
var runnerGroups []*github.RunnerGroup
|
||||||
|
|
||||||
|
opts := github.ListOptions{PerPage: 100}
|
||||||
|
for {
|
||||||
|
list, res, err := c.listOrganizationRunnerGroupsVisibleToRepo(ctx, org, repo, &opts)
|
||||||
|
if err != nil {
|
||||||
|
return runnerGroups, fmt.Errorf("failed to list organization runner groups: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
runnerGroups = append(runnerGroups, list.RunnerGroups...)
|
||||||
|
if res.NextPage == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
opts.Page = res.NextPage
|
||||||
|
}
|
||||||
|
|
||||||
|
return runnerGroups, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) ListRunnerGroupRepositoryAccesses(ctx context.Context, org string, runnerGroupId int64) ([]*github.Repository, error) {
|
||||||
|
var repos []*github.Repository
|
||||||
|
|
||||||
|
opts := github.ListOptions{PerPage: 100}
|
||||||
|
for {
|
||||||
|
list, res, err := c.Client.Actions.ListRepositoryAccessRunnerGroup(ctx, org, runnerGroupId, &opts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to list repository access for runner group: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
repos = append(repos, list.Repositories...)
|
||||||
|
if res.NextPage == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
opts.Page = res.NextPage
|
||||||
|
}
|
||||||
|
|
||||||
|
return repos, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listOrganizationRunnerGroupsVisibleToRepo lists all self-hosted runner groups configured in an organization which can be used by the repository.
|
||||||
|
//
|
||||||
|
// GitHub API docs: https://docs.github.com/en/rest/reference/actions#list-self-hosted-runner-groups-for-an-organization
|
||||||
|
func (c *Client) listOrganizationRunnerGroupsVisibleToRepo(ctx context.Context, org, repo string, opts *github.ListOptions) (*github.RunnerGroups, *github.Response, error) {
|
||||||
|
repoName := repo
|
||||||
|
parts := strings.Split(repo, "/")
|
||||||
|
if len(parts) == 2 {
|
||||||
|
repoName = parts[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
u := fmt.Sprintf("orgs/%v/actions/runner-groups?visible_to_repository=%v", org, repoName)
|
||||||
|
|
||||||
|
if opts != nil {
|
||||||
|
if opts.PerPage > 0 {
|
||||||
|
u = fmt.Sprintf("%v&per_page=%v", u, opts.PerPage)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Page > 0 {
|
||||||
|
u = fmt.Sprintf("%v&page=%v", u, opts.Page)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := c.Client.NewRequest("GET", u, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
groups := &github.RunnerGroups{}
|
||||||
|
resp, err := c.Client.Do(ctx, req, &groups)
|
||||||
|
if err != nil {
|
||||||
|
return nil, resp, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return groups, resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
// cleanup removes expired registration tokens.
|
// cleanup removes expired registration tokens.
|
||||||
func (c *Client) cleanup() {
|
func (c *Client) cleanup() {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
@@ -480,12 +514,3 @@ func (r *Client) IsRunnerBusy(ctx context.Context, enterprise, org, repo, name s
|
|||||||
|
|
||||||
return false, &RunnerNotFound{runnerName: name}
|
return false, &RunnerNotFound{runnerName: name}
|
||||||
}
|
}
|
||||||
|
|
||||||
func containsString(list []string, value string) bool {
|
|
||||||
for _, item := range list {
|
|
||||||
if item == value {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -152,3 +152,10 @@ func TestCleanup(t *testing.T) {
|
|||||||
t.Errorf("expired token still exists")
|
t.Errorf("expired token still exists")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUserAgent(t *testing.T) {
|
||||||
|
client := newTestClient()
|
||||||
|
if client.UserAgent != "actions-runner-controller" {
|
||||||
|
t.Errorf("UserAgent should be set to actions-runner-controller")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user