mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-11 03:57:01 +00:00
Add DrainJobsMode (aka UpdateStrategy feature) (#2569)
This commit is contained in:
@@ -23,6 +23,14 @@ inputs:
|
||||
arc-controller-namespace:
|
||||
description: 'The namespace of the configured gha-runner-scale-set-controller'
|
||||
required: true
|
||||
wait-to-finish:
|
||||
description: 'Wait for the workflow run to finish'
|
||||
required: true
|
||||
default: "true"
|
||||
wait-to-running:
|
||||
description: 'Wait for the workflow run to start running'
|
||||
required: true
|
||||
default: "false"
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -118,7 +126,36 @@ runs:
|
||||
| ${{steps.query_workflow.outputs.workflow_run_url}} |
|
||||
EOF
|
||||
|
||||
- name: Wait for workflow to start running
|
||||
if: inputs.wait-to-running == 'true' && inputs.wait-to-finish == 'false'
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms))
|
||||
}
|
||||
const owner = '${{inputs.repo-owner}}'
|
||||
const repo = '${{inputs.repo-name}}'
|
||||
const workflow_run_id = ${{steps.query_workflow.outputs.workflow_run}}
|
||||
const workflow_job_id = ${{steps.query_workflow.outputs.workflow_job}}
|
||||
let count = 0
|
||||
while (count++<10) {
|
||||
await sleep(30 * 1000);
|
||||
let getRunResponse = await github.rest.actions.getWorkflowRun({
|
||||
owner: owner,
|
||||
repo: repo,
|
||||
run_id: workflow_run_id
|
||||
})
|
||||
console.log(`${getRunResponse.data.html_url}: ${getRunResponse.data.status} (${getRunResponse.data.conclusion})`);
|
||||
if (getRunResponse.data.status == 'in_progress') {
|
||||
console.log(`Workflow run is in progress.`)
|
||||
return
|
||||
}
|
||||
}
|
||||
core.setFailed(`The triggered workflow run didn't start properly using ${{inputs.arc-name}}`)
|
||||
|
||||
- name: Wait for workflow to finish successfully
|
||||
if: inputs.wait-to-finish == 'true'
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
@@ -151,10 +188,15 @@ runs:
|
||||
}
|
||||
core.setFailed(`The triggered workflow run didn't finish properly using ${{inputs.arc-name}}`)
|
||||
|
||||
- name: cleanup
|
||||
if: inputs.wait-to-finish == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
helm uninstall ${{ inputs.arc-name }} --namespace ${{inputs.arc-namespace}} --debug
|
||||
kubectl wait --timeout=10s --for=delete AutoScalingRunnerSet -n ${{inputs.arc-name}} -l app.kubernetes.io/instance=${{ inputs.arc-name }}
|
||||
|
||||
- name: Gather logs and cleanup
|
||||
shell: bash
|
||||
if: always()
|
||||
run: |
|
||||
helm uninstall ${{ inputs.arc-name }} --namespace ${{inputs.arc-namespace}} --debug
|
||||
kubectl wait --timeout=10s --for=delete AutoScalingRunnerSet -n ${{inputs.arc-name}} -l app.kubernetes.io/instance=${{ inputs.arc-name }}
|
||||
kubectl logs deployment/arc-gha-runner-scale-set-controller -n ${{inputs.arc-controller-namespace}}
|
||||
170
.github/workflows/gha-e2e-tests.yaml
vendored
170
.github/workflows/gha-e2e-tests.yaml
vendored
@@ -710,3 +710,173 @@ jobs:
|
||||
arc-name: ${{steps.install_arc.outputs.ARC_NAME}}
|
||||
arc-namespace: "arc-runners"
|
||||
arc-controller-namespace: "arc-systems"
|
||||
|
||||
update-strategy-tests:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
|
||||
env:
|
||||
WORKFLOW_FILE: "arc-test-sleepy-matrix.yaml"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{github.head_ref}}
|
||||
|
||||
- uses: ./.github/actions/setup-arc-e2e
|
||||
id: setup
|
||||
with:
|
||||
app-id: ${{secrets.E2E_TESTS_ACCESS_APP_ID}}
|
||||
app-pk: ${{secrets.E2E_TESTS_ACCESS_PK}}
|
||||
image-name: ${{env.IMAGE_NAME}}
|
||||
image-tag: ${{env.IMAGE_VERSION}}
|
||||
target-org: ${{env.TARGET_ORG}}
|
||||
|
||||
- name: Install gha-runner-scale-set-controller
|
||||
id: install_arc_controller
|
||||
run: |
|
||||
helm install arc \
|
||||
--namespace "arc-systems" \
|
||||
--create-namespace \
|
||||
--set image.repository=${{ env.IMAGE_NAME }} \
|
||||
--set image.tag=${{ env.IMAGE_VERSION }} \
|
||||
--set flags.updateStrategy="eventual" \
|
||||
./charts/gha-runner-scale-set-controller \
|
||||
--debug
|
||||
count=0
|
||||
while true; do
|
||||
POD_NAME=$(kubectl get pods -n arc-systems -l app.kubernetes.io/name=gha-runner-scale-set-controller -o name)
|
||||
if [ -n "$POD_NAME" ]; then
|
||||
echo "Pod found: $POD_NAME"
|
||||
break
|
||||
fi
|
||||
if [ "$count" -ge 60 ]; then
|
||||
echo "Timeout waiting for controller pod with label app.kubernetes.io/name=gha-runner-scale-set-controller"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
count=$((count+1))
|
||||
done
|
||||
kubectl wait --timeout=30s --for=condition=ready pod -n arc-systems -l app.kubernetes.io/name=gha-runner-scale-set-controller
|
||||
kubectl get pod -n arc-systems
|
||||
kubectl describe deployment arc-gha-runner-scale-set-controller -n arc-systems
|
||||
|
||||
- name: Install gha-runner-scale-set
|
||||
id: install_arc
|
||||
run: |
|
||||
ARC_NAME=${{github.job}}-$(date +'%M%S')$((($RANDOM + 100) % 100 + 1))
|
||||
helm install "$ARC_NAME" \
|
||||
--namespace "arc-runners" \
|
||||
--create-namespace \
|
||||
--set githubConfigUrl="https://github.com/${{ env.TARGET_ORG }}/${{env.TARGET_REPO}}" \
|
||||
--set githubConfigSecret.github_token="${{ steps.setup.outputs.token }}" \
|
||||
./charts/gha-runner-scale-set \
|
||||
--debug
|
||||
echo "ARC_NAME=$ARC_NAME" >> $GITHUB_OUTPUT
|
||||
count=0
|
||||
while true; do
|
||||
POD_NAME=$(kubectl get pods -n arc-systems -l actions.github.com/scale-set-name=$ARC_NAME -o name)
|
||||
if [ -n "$POD_NAME" ]; then
|
||||
echo "Pod found: $POD_NAME"
|
||||
break
|
||||
fi
|
||||
if [ "$count" -ge 60 ]; then
|
||||
echo "Timeout waiting for listener pod with label actions.github.com/scale-set-name=$ARC_NAME"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
count=$((count+1))
|
||||
done
|
||||
kubectl wait --timeout=30s --for=condition=ready pod -n arc-systems -l actions.github.com/scale-set-name=$ARC_NAME
|
||||
kubectl get pod -n arc-systems
|
||||
|
||||
- name: Trigger long running jobs and wait for runners to pick them up
|
||||
uses: ./.github/actions/execute-assert-arc-e2e
|
||||
timeout-minutes: 10
|
||||
with:
|
||||
auth-token: ${{ steps.setup.outputs.token }}
|
||||
repo-owner: ${{ env.TARGET_ORG }}
|
||||
repo-name: ${{env.TARGET_REPO}}
|
||||
workflow-file: ${{env.WORKFLOW_FILE}}
|
||||
arc-name: ${{steps.install_arc.outputs.ARC_NAME}}
|
||||
arc-namespace: "arc-runners"
|
||||
arc-controller-namespace: "arc-systems"
|
||||
wait-to-running: "true"
|
||||
wait-to-finish: "false"
|
||||
|
||||
- name: Upgrade the gha-runner-scale-set
|
||||
shell: bash
|
||||
run: |
|
||||
helm upgrade --install "${{ steps.install_arc.outputs.ARC_NAME }}" \
|
||||
--namespace "arc-runners" \
|
||||
--create-namespace \
|
||||
--set githubConfigUrl="https://github.com/${{ env.TARGET_ORG }}/${{ env.TARGET_REPO }}" \
|
||||
--set githubConfigSecret.github_token="${{ steps.setup.outputs.token }}" \
|
||||
--set template.spec.containers[0].name="runner" \
|
||||
--set template.spec.containers[0].image="ghcr.io/actions/actions-runner:latest" \
|
||||
--set template.spec.containers[0].command={"/home/runner/run.sh"} \
|
||||
--set template.spec.containers[0].env[0].name="TEST" \
|
||||
--set template.spec.containers[0].env[0].value="E2E TESTS" \
|
||||
./charts/gha-runner-scale-set \
|
||||
--debug
|
||||
|
||||
- name: Assert that the listener is deleted while jobs are running
|
||||
shell: bash
|
||||
run: |
|
||||
count=0
|
||||
while true; do
|
||||
LISTENER_COUNT="$(kubectl get pods -l actions.github.com/scale-set-name=${{ steps.install_arc.outputs.ARC_NAME }} -n arc-systems --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
|
||||
RUNNERS_COUNT="$(kubectl get pods -l app.kubernetes.io/component=runner -n arc-runners --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
|
||||
RESOURCES="$(kubectl get pods -A)"
|
||||
|
||||
if [ "$LISTENER_COUNT" -eq 0 ]; then
|
||||
echo "Listener has been deleted"
|
||||
echo "$RESOURCES"
|
||||
exit 0
|
||||
fi
|
||||
if [ "$count" -ge 60 ]; then
|
||||
echo "Timeout waiting for listener to be deleted"
|
||||
echo "$RESOURCES"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Waiting for listener to be deleted"
|
||||
echo "Listener count: $LISTENER_COUNT target: 0 | Runners count: $RUNNERS_COUNT target: 3"
|
||||
|
||||
sleep 1
|
||||
count=$((count+1))
|
||||
done
|
||||
|
||||
- name: Assert that the listener goes back up after the jobs are done
|
||||
shell: bash
|
||||
run: |
|
||||
count=0
|
||||
while true; do
|
||||
LISTENER_COUNT="$(kubectl get pods -l actions.github.com/scale-set-name=${{ steps.install_arc.outputs.ARC_NAME }} -n arc-systems --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
|
||||
RUNNERS_COUNT="$(kubectl get pods -l app.kubernetes.io/component=runner -n arc-runners --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
|
||||
RESOURCES="$(kubectl get pods -A)"
|
||||
|
||||
if [ "$LISTENER_COUNT" -eq 1 ]; then
|
||||
echo "Listener is up!"
|
||||
echo "$RESOURCES"
|
||||
exit 0
|
||||
fi
|
||||
if [ "$count" -ge 120 ]; then
|
||||
echo "Timeout waiting for listener to be recreated"
|
||||
echo "$RESOURCES"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Waiting for listener to be recreated"
|
||||
echo "Listener count: $LISTENER_COUNT target: 1 | Runners count: $RUNNERS_COUNT target: 0"
|
||||
|
||||
sleep 1
|
||||
count=$((count+1))
|
||||
done
|
||||
|
||||
- name: Gather logs and cleanup
|
||||
shell: bash
|
||||
if: always()
|
||||
run: |
|
||||
helm uninstall "${{ steps.install_arc.outputs.ARC_NAME }}" --namespace "arc-runners" --debug
|
||||
kubectl wait --timeout=10s --for=delete AutoScalingRunnerSet -n "${{ steps.install_arc.outputs.ARC_NAME }}" -l app.kubernetes.io/instance="${{ steps.install_arc.outputs.ARC_NAME }}"
|
||||
kubectl logs deployment/arc-gha-runner-scale-set-controller -n "arc-systems"
|
||||
Reference in New Issue
Block a user