feat: Support for scaling from/to zero (#465)

This is an attempt to support scaling from/to zero.

The basic idea is that we create a one-off "registration-only" runner pod on RunnerReplicaSet being scaled to zero, so that there is one "offline" runner, which enables GitHub Actions to queue jobs instead of discarding those.

GitHub Actions seems to immediately throw away the new job when there are no runners at all. Generally, having runners of any status, `busy`, `idle`, or `offline` would prevent GitHub actions from failing jobs. But retaining `busy` or `idle` runners means that we need to keep runner pods running, which conflicts with our desired to scale to/from zero, hence we retain `offline` runners.

In this change, I enhanced the runnerreplicaset controller to create a registration-only runner on very beginning of its reconciliation logic, only when a runnerreplicaset is scaled to zero. The runner controller creates the registration-only runner pod, waits for it to become "offline", and then removes the runner pod. The runner on GitHub stays `offline`, until the runner resource on K8s is deleted. As we remove the registration-only runner pod as soon as it registers, this doesn't block cluster-autoscaler.

Related to #447
This commit is contained in:
Yusuke Kuoka
2021-05-02 16:11:36 +09:00
committed by GitHub
parent 7e766282aa
commit dbd7b486d2
10 changed files with 302 additions and 50 deletions

View File

@@ -113,7 +113,7 @@ generate: controller-gen
$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..."
# Build the docker image
docker-build: test
docker-build:
docker build . -t ${NAME}:${VERSION}
docker build runner -t ${RUNNER_NAME}:${VERSION} --build-arg TARGETPLATFORM=$(shell arch)
@@ -172,7 +172,7 @@ acceptance/pull: docker-build
acceptance/setup:
kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/v1.0.4/cert-manager.yaml #kubectl create namespace actions-runner-system
kubectl -n cert-manager wait deploy/cert-manager-cainjector --for condition=available --timeout 60s
kubectl -n cert-manager wait deploy/cert-manager-cainjector --for condition=available --timeout 90s
kubectl -n cert-manager wait deploy/cert-manager-webhook --for condition=available --timeout 60s
kubectl -n cert-manager wait deploy/cert-manager --for condition=available --timeout 60s
kubectl create namespace actions-runner-system || true
@@ -230,6 +230,7 @@ OS_NAME := $(shell uname -s | tr A-Z a-z)
# find or download etcd
etcd:
ifeq (, $(shell which etcd))
ifeq (, $(wildcard $(TEST_ASSETS)/etcd))
@{ \
set -xe ;\
@@ -247,9 +248,13 @@ ETCD_BIN=$(TEST_ASSETS)/etcd
else
ETCD_BIN=$(TEST_ASSETS)/etcd
endif
else
ETCD_BIN=$(shell which etcd)
endif
# find or download kube-apiserver
kube-apiserver:
ifeq (, $(shell which kube-apiserver))
ifeq (, $(wildcard $(TEST_ASSETS)/kube-apiserver))
@{ \
set -xe ;\
@@ -267,10 +272,13 @@ KUBE_APISERVER_BIN=$(TEST_ASSETS)/kube-apiserver
else
KUBE_APISERVER_BIN=$(TEST_ASSETS)/kube-apiserver
endif
else
KUBE_APISERVER_BIN=$(shell which kube-apiserver)
endif
# find or download kubectl
kubectl:
ifeq (, $(shell which kubectl))
ifeq (, $(wildcard $(TEST_ASSETS)/kubectl))
@{ \
set -xe ;\
@@ -288,3 +296,6 @@ KUBECTL_BIN=$(TEST_ASSETS)/kubectl
else
KUBECTL_BIN=$(TEST_ASSETS)/kubectl
endif
else
KUBECTL_BIN=$(shell which kubectl)
endif