Compare commits

...

8 Commits

Author SHA1 Message Date
Callum Tait
e4e0b45933 chore: bump helm chart to latest app version (#862) 2021-10-02 10:05:06 +01:00
Renovate Bot
2937173101 chore(deps): update dependency actions/runner to v2.283.2 2021-09-30 15:01:11 +00:00
Aidan
fccf29970b Fix bug related to label matching. (#852)
* Fix bug related to label matching.

Add start of test framework for Workflow Job Events

Signed-off-by: Aidan Jensen <aidan@artificial.com>
Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-09-30 11:02:59 +09:00
Alex Kulikovskikh
ea06001819 fix: scaling issue based on workflow_job event (#850)
This PR fix scaling issue based on `workflow_job` event discussed in #819
2021-09-30 10:36:59 +09:00
Yusuke Kuoka
1bc1712519 Do bump go-github in codebase to fix build error in CI builds (#853) 2021-09-30 10:35:24 +09:00
Callum Tait
24224613f3 docs: updating to reflect the latest release (#844)
* docs: updating to reflect the latest release

* docs: further updates

* docs: format updates

* docs: correcting title sizes

* docs: correcting links

* docs: correcting the grammar of multi controllers

* docs: slightly less awkward English
2021-09-28 09:44:15 +01:00
Rob Bos
3f331e9a39 Fixing capitalization and a typo (#838)
* Fixing capitalization and a typo

* typo

* Typo

* Update controllers/autoscaling.go

* Update controllers/autoscaling.go

Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-09-26 14:34:55 +09:00
Yusuke Kuoka
67c7b7a228 Bump chart version to 0.13.1 with controller 0.20.1 2021-09-24 00:40:08 +00:00
11 changed files with 708 additions and 81 deletions

View File

@@ -19,7 +19,7 @@ on:
- .github/workflows/build-and-release-runners.yml - .github/workflows/build-and-release-runners.yml
env: env:
RUNNER_VERSION: 2.283.1 RUNNER_VERSION: 2.283.2
DOCKER_VERSION: 20.10.8 DOCKER_VERSION: 20.10.8
DOCKERHUB_USERNAME: summerwind DOCKERHUB_USERNAME: summerwind

View File

@@ -4,7 +4,7 @@
Depending on what you are patching depends on how you should go about it. Below are some guides on how to test patches locally as well as develop the controller and runners. Depending on what you are patching depends on how you should go about it. Below are some guides on how to test patches locally as well as develop the controller and runners.
When sumitting a PR for a change please provide evidence that your change works as we still need to work on improving the CI of the project. Some resources are provided for helping achieve this, see this guide for details. When submitting a PR for a change please provide evidence that your change works as we still need to work on improving the CI of the project. Some resources are provided for helping achieve this, see this guide for details.
#### Running an End to End Test #### Running an End to End Test

136
README.md
View File

@@ -20,7 +20,7 @@ ToC:
- [Runner Deployments](#runnerdeployments) - [Runner Deployments](#runnerdeployments)
- [Note on scaling to/from 0](#note-on-scaling-tofrom-0) - [Note on scaling to/from 0](#note-on-scaling-tofrom-0)
- [Autoscaling](#autoscaling) - [Autoscaling](#autoscaling)
- [Faster Autoscaling with GitHub Webhook](#faster-autoscaling-with-github-webhook) - [Webhook Driven Scaling](#webhook-driven-scaling)
- [Autoscaling to/from 0](#autoscaling-tofrom-0) - [Autoscaling to/from 0](#autoscaling-tofrom-0)
- [Scheduled Overrides](#scheduled-overrides) - [Scheduled Overrides](#scheduled-overrides)
- [Runner with DinD](#runner-with-dind) - [Runner with DinD](#runner-with-dind)
@@ -51,8 +51,8 @@ Subsequent to this, install the custom resource definitions and actions-runner-c
**Kubectl Deployment:** **Kubectl Deployment:**
```shell ```shell
# REPLACE "v0.18.2" with the version you wish to deploy # REPLACE "v0.20.1" with the version you wish to deploy
kubectl apply -f https://github.com/actions-runner-controller/actions-runner-controller/releases/download/v0.18.2/actions-runner-controller.yaml kubectl apply -f https://github.com/actions-runner-controller/actions-runner-controller/releases/download/v0.20.1/actions-runner-controller.yaml
``` ```
**Helm Deployment:** **Helm Deployment:**
@@ -101,7 +101,7 @@ _Note: Links are provided further down to create an app for your logged in user
* Actions (read) * Actions (read)
* Administration (read / write) * Administration (read / write)
* Checks (read) (if you are going to use [Faster Autoscaling with GitHub Webhook](#faster-autoscaling-with-github-webhook)) * Checks (read) (if you are going to use [Webhook Driven Scaling](#webhook-driven-scaling))
* Metadata (read) * Metadata (read)
**Required Permissions for Organization Runners:**<br /> **Required Permissions for Organization Runners:**<br />
@@ -114,7 +114,7 @@ _Note: Links are provided further down to create an app for your logged in user
* Self-hosted runners (read / write) * Self-hosted runners (read / write)
**Subscribe to events** **Subscribe to events**
* Check run (if you are going to use [Faster Autoscaling with GitHub Webhook](#faster-autoscaling-with-github-webhook)) * Check run (if you are going to use [Webhook Driven Scaling](#webhook-driven-scaling))
_Note: All API routes mapped to their permissions can be found [here](https://docs.github.com/en/rest/reference/permissions-required-for-github-apps) if you wish to review_ _Note: All API routes mapped to their permissions can be found [here](https://docs.github.com/en/rest/reference/permissions-required-for-github-apps) if you wish to review_
@@ -214,9 +214,9 @@ Configure your values.yaml, see the chart's [README](./charts/actions-runner-con
By default the controller will look for runners in all namespaces, the watch namespace feature allows you to restrict the controller to monitoring a single namespace. This then lets you deploy multiple controllers in a single cluster. You may want to do this either because you wish to scale beyond the API rate limit of a single PAT / GitHub App configuration or you wish to support multiple GitHub organizations with runners installed at the organization level in a single cluster. By default the controller will look for runners in all namespaces, the watch namespace feature allows you to restrict the controller to monitoring a single namespace. This then lets you deploy multiple controllers in a single cluster. You may want to do this either because you wish to scale beyond the API rate limit of a single PAT / GitHub App configuration or you wish to support multiple GitHub organizations with runners installed at the organization level in a single cluster.
This feature is configured via the controller `--watch-namespace` flag. When a namespace is provided via this flag the controller will only monitor runners in that namespace. This feature is configured via the controller's `--watch-namespace` flag. When a namespace is provided via this flag, the controller will only monitor runners in that namespace.
If you plan on installing all instances of the controller stack into a single namespace you will need to make the names of the resources are unique for each stack. In the case of Helm this can be done by giving each a unique release name, or via the `fullnameOverride` properties. If you plan on installing all instances of the controller stack into a single namespace you will need to make the names of the resources unique to each stack. In the case of Helm this can be done by giving each install a unique release name, or via the `fullnameOverride` properties.
Alternatively, you can install each controller stack into its own unique namespace (relative to other controller stacks in the cluster), avoiding the need to uniquely prefix resources. Alternatively, you can install each controller stack into its own unique namespace (relative to other controller stacks in the cluster), avoiding the need to uniquely prefix resources.
@@ -374,13 +374,11 @@ This, in combination with a correctly configured HorizontalRunnerAutoscaler, all
### Autoscaling ### Autoscaling
__**IMPORTANT : Due to limitations / a bug with GitHub's [routing engine](https://docs.github.com/en/actions/hosting-your-own-runners/using-self-hosted-runners-in-a-workflow#routing-precedence-for-self-hosted-runners) autoscaling does NOT work correctly with RunnerDeployments that target the enterprise level. Scaling activity works as expected however jobs fail to get assigned to the scaled out replicas. This was explored in issue [#470](https://github.com/actions-runner-controller/actions-runner-controller/issues/470). Once GitHub resolves the issue with their backend service we expect the solution to be able to support autoscaled enterprise runnerdeploments without any additional changes.**__ A `RunnerDeployment` can scale the number of runners between `minReplicas` and `maxReplicas` fields on either a pull based scaling metric as defined in the `metrics` attribute or a webhook event. Since GitHub's release of the [`workflow_job` webhook](https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job), webhook-based autoscaling is the preferred way of autoscaling, it offers more accurate quicker scaling compared to the pull based metrics and is easy to setup.
**NOTE: Once `workflow_job` webhook events are released on GitHub, the webhook-based autoscaling is the preferred way of autoscaling, because it is easy to configure and has the ability to accurately detect which runners to scale. See [Example 3: Scale on each `workflow_job` event](#example-3-scale-on-each-workflow_job-event)** The below section covers the pull based metrics which you may want to consider if your scaling demands are minor. To configure your webhook based scaling see the [Webhook Driven Scaling](#webhook-driven-scaling) section.
A `RunnerDeployment` (excluding enterprise runners) can scale the number of runners between `minReplicas` and `maxReplicas` fields based the chosen scaling metric as defined in the `metrics` attribute **Pull Driven Scaling Metrics**
**Scaling Metrics**
**TotalNumberOfQueuedAndInProgressWorkflowRuns** **TotalNumberOfQueuedAndInProgressWorkflowRuns**
@@ -405,7 +403,7 @@ The scale out performance is controlled via the manager containers startup `--sy
Example `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`: Example `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`:
_Important!!! We no longer include the attribute `replicas` in our `RunnerDeployment` if we are configuring autoscaling!_ **_Important!!! We no longer include the attribute `replicas` in our `RunnerDeployment` if we are configuring autoscaling!_**
```yaml ```yaml
apiVersion: actions.summerwind.dev/v1alpha1 apiVersion: actions.summerwind.dev/v1alpha1
@@ -460,7 +458,7 @@ The `HorizontalRunnerAutoscaler` will poll GitHub based on the configuration syn
Examples of each scaling type implemented with a `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`: Examples of each scaling type implemented with a `RunnerDeployment` backed by a `HorizontalRunnerAutoscaler`:
_Important!!! We no longer include the attribute `replicas` in our `RunnerDeployment` if we are configuring autoscaling!_ **_Important!!! We no longer include the attribute `replicas` in our `RunnerDeployment` if we are configuring autoscaling!_**
```yaml ```yaml
--- ---
@@ -507,17 +505,12 @@ spec:
scaleDownDelaySecondsAfterScaleOut: 60 scaleDownDelaySecondsAfterScaleOut: 60
``` ```
#### Faster Autoscaling with GitHub Webhook #### Webhook Driven Scaling
__**IMPORTANT : Due to missing webhook events, webhook based scaling is not available for enterprise level RunnerDeployments. This was explored in issue [#470](https://github.com/actions-runner-controller/actions-runner-controller/issues/470).**__
> This feature is an ADVANCED feature which may require more work to set up.
> Please get prepared to put some time and effort to learn and leverage this feature!
`actions-runner-controller` has an optional Webhook server that receives GitHub Webhook events and scale `actions-runner-controller` has an optional Webhook server that receives GitHub Webhook events and scale
[`RunnerDeployments`](#runnerdeployments) by updating corresponding [`HorizontalRunnerAutoscalers`](#autoscaling). [`RunnerDeployments`](#runnerdeployments) by updating corresponding [`HorizontalRunnerAutoscalers`](#autoscaling).
Today, the Webhook server can be configured to respond GitHub `check_run`, `pull_request`, and `push` events Today, the Webhook server can be configured to respond GitHub `check_run`, `workflow_job`, `pull_request` and `push` events
by scaling up the matching `HorizontalRunnerAutoscaler` by N replica(s), where `N` is configurable within by scaling up the matching `HorizontalRunnerAutoscaler` by N replica(s), where `N` is configurable within
`HorizontalRunnerAutoscaler's` `Spec`. `HorizontalRunnerAutoscaler's` `Spec`.
@@ -541,7 +534,7 @@ spec:
With the above example, the webhook server scales `myrunners` by `1` replica for 5 minutes on each `check_run` event With the above example, the webhook server scales `myrunners` by `1` replica for 5 minutes on each `check_run` event
with the type of `created` and the status of `queued` received. with the type of `created` and the status of `queued` received.
The primary benefit of autoscaling on Webhook compared to the standard autoscaling is that this one allows you to The primary benefit of autoscaling on Webhook compared to the standard autoscaling is that it is far quicker as it allows you to
immediately add "resource slack" for future GitHub Actions job runs. immediately add "resource slack" for future GitHub Actions job runs.
In contrast, the standard autoscaling requires you to wait next sync period to add In contrast, the standard autoscaling requires you to wait next sync period to add
@@ -552,7 +545,8 @@ But doing so eventually results in the controller not being functional due to it
To enable this feature, you firstly need to install the webhook server. To enable this feature, you firstly need to install the webhook server.
Currently, only our Helm chart has the ability install it. Currently, only our Helm chart has the ability install it:
_[see the values documentation for all configuration options](https://github.com/actions-runner-controller/actions-runner-controller/blob/master/charts/actions-runner-controller/README.md)_
```console ```console
$ helm --upgrade install actions-runner-controller/actions-runner-controller \ $ helm --upgrade install actions-runner-controller/actions-runner-controller \
@@ -568,10 +562,44 @@ Once you were able to confirm that the Webhook server is ready and running from
GitHub sending PING events to the Webhook server - create or update your `HorizontalRunnerAutoscaler` resources GitHub sending PING events to the Webhook server - create or update your `HorizontalRunnerAutoscaler` resources
by learning the following configuration examples. by learning the following configuration examples.
- [Example 1: Scale up on each `check_run` event](#example-1-scale-up-on-each-check_run-event) - [Example 1: Scale on each `workflow_job` event](#example-1-scale-on-each-workflow_job-event)
- [Example 2: Scale on each `pull_request` event against `develop` or `main` branches](#example-2-scale-on-each-pull_request-event-against-develop-or-main-branches) - [Example 2: Scale up on each `check_run` event](#example-2-scale-up-on-each-check_run-event)
- [Example 3: Scale on each `pull_request` event against a given set of branches](#example-3-scale-on-each-pull_request-event-against-a-given-set-of-branches)
##### Example 1: Scale up on each `check_run` event ##### Example 1: Scale on each `workflow_job` event
> This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0)
The most flexible webhook GitHub offers is the `workflow_job` webhook, it includes the `runs-on` information in the payload allowing scaling based on runner labels.
This webhook should cover most people's needs, please experiment with this webhook before considering the others.
```yaml
kind: RunnerDeployment
metadata:
name: myrunners
spec:
repository: example/myrepo
---
kind: HorizontalRunnerAutoscaler
spec:
scaleTargetRef:
name: myrunners
scaleUpTriggers:
- githubEvent: {}
duration: "30m"
```
You can configure your GitHub webhook settings to only include `Workflows Job` events, so that it sends us three kinds of `workflow_job` events per a job run.
Each kind has a `status` of `queued`, `in_progress` and `completed`.
With the above configuration, `actions-runner-controller` adds one runner for a `workflow_job` event whose `status` is `queued`. Similarly, it removes one runner for a `workflow_job` event whose `status` is `completed`.
Beware that a scale-down after a scale-up is deferred until `scaleDownDelaySecondsAfterScaleOut` elapses. Let's say you had configured `scaleDownDelaySecondsAfterScaleOut` of 60 seconds, 2 consequtive workflow jobs will result in immediately adding 2 runners. The 2 runners are removed only after 60 seconds. This basically gives you 60 seconds of a "grace period" that makes it possible for self-hosted runners to immediately run additional workflow jobs enqueued in that 60 seconds.
You must not include `spec.metrics` like `PercentageRunnersBusy` when using this feature, as it is unnecessary. That is, if you've configured the webhook for `workflow_job`, it should be enough for all your scale-out needs.
##### Example 2: Scale up on each `check_run` event
> Note: This should work almost like https://github.com/philips-labs/terraform-aws-github-runner > Note: This should work almost like https://github.com/philips-labs/terraform-aws-github-runner
@@ -615,13 +643,15 @@ spec:
checkRun: checkRun:
types: ["created"] types: ["created"]
status: "queued" status: "queued"
# allow only certain repositories within your organization to trigger autoscaling # Optionally restrict autoscaling to being triggered by events from specific repositories within your organization still
# repositories: ["myrepo", "myanotherrepo"] # repositories: ["myrepo", "myanotherrepo"]
amount: 1 amount: 1
duration: "5m" duration: "5m"
``` ```
###### Example 2: Scale on each `pull_request` event against `develop` or `main` branches ##### Example 3: Scale on each `pull_request` event against a given set of branches
To scale up replicas of the runners for `example/myrepo` by 1 for 5 minutes on each `pull_request` against the `main` or `develop` branch you write manifests like the below:
```yaml ```yaml
kind: RunnerDeployment kind: RunnerDeployment
@@ -645,34 +675,6 @@ spec:
See ["activity types"](https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request) for the list of valid values for `scaleUpTriggers[].githubEvent.pullRequest.types`. See ["activity types"](https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request) for the list of valid values for `scaleUpTriggers[].githubEvent.pullRequest.types`.
###### Example 3: Scale on each `workflow_job` event
> This feature depends on an unreleased GitHub feature
```yaml
kind: RunnerDeployment
metadata:
name: myrunners
spec:
repository: example/myrepo
---
kind: HorizontalRunnerAutoscaler
spec:
scaleTargetRef:
name: myrunners
scaleUpTriggers:
- githubEvent: {}
duration: "30m"
```
You can configure your GitHub webhook settings to only include `Workflows Job` events, so that it sends us three kinds of `workflow_job` events per a job run.
Each kind has a `status` of `queued`, `in_progress` and `completed`.
With the above configuration, `actions-runner-controller` adds one runner for a `workflow_job` event whose `status` is `queued`. Similarly, it removes one runner for a `workflow_job` event whose `status` is `completed`.
Beware that a scale-down after a scale-up is deferred until `scaleDownDelaySecondsAfterScaleOut` elapses. Let's say you had configured `scaleDownDelaySecondsAfterScaleOut` of 60 seconds, 2 consequtive workflow jobs will result in immediately adding 2 runners. The 2 runners are removed only after 60 seconds. This basically gives you 60 seconds of a "grace period" that makes it possible for self-hosted runners to immediately run additional workflow jobs enqueued in that 60 seconds.
You must not include `spec.metrics` like `PercentageRunnersBusy` when using this feature, as it is unnecessary. That is, if you've configured the webhook for `workflow_job`, it should be enough for all the scale-out need.
#### Autoscaling to/from 0 #### Autoscaling to/from 0
@@ -680,17 +682,16 @@ You must not include `spec.metrics` like `PercentageRunnersBusy` when using this
Previously, we've discussed about [how to scale a RunnerDeployment to/from 0](#note-on-scaling-tofrom-0) Previously, we've discussed about [how to scale a RunnerDeployment to/from 0](#note-on-scaling-tofrom-0)
To automate the process of scaling to/from 0, you can use `HorizontalRunnerAutoscaler` with a caveat. To scale from 0 whilst still being able to provision runners as jobs are queued we must use the `HorizontalRunnerAutoscaler` with only certain metric configurations, only the below configurations support scaling from 0 whilst also being able to provision runners as jobs are queued:
That is, you need to choose one of the following configuration for metrics and triggers:
- `TotalNumberOfQueuedAndInProgressWorkflowRuns` - `TotalNumberOfQueuedAndInProgressWorkflowRuns`
- `PercentageRunnersBusy` + `TotalNumberOfQueuedAndInProgressWorkflowRuns` - `PercentageRunnersBusy` + `TotalNumberOfQueuedAndInProgressWorkflowRuns`
- `PercentageRunnersBusy` + Webhook-based autoscaling - `PercentageRunnersBusy` + Webhook-based autoscaling
- Webhook-based autoscaling only
This is due to that `PercentageRunnersBusy`, by its definition, needs one or more GitHub runners that can become `busy`, which cannot happen at all when you have 0 active runners. This is due to that `PercentageRunnersBusy`, by its definition, needs one or more GitHub runners that can become `busy` to be able to scale. If there isn't a runner to pick up a job and enter a `busy` state then the controller will never know to provision a runner to begin with as this metric is no knowledge of the jobs queued and is relying using the number of busy runners as a means for calculating the desired replica count.
If and only if HorizontalRunnerAutoscaler is configured to have a secondary metric of `TotalNumberOfQueuedAndInProgressWorkflowRuns` and the controller sees the primary metric of `PercentageRunnersBusy` returned 0 desired replicas, it uses the secondary metric for calculating the desired replicas once again. If a HorizontalRunnerAutoscaler is configured with a secondary metric of `TotalNumberOfQueuedAndInProgressWorkflowRuns` then be aware that the controller will check the primary metric of `PercentageRunnersBusy` first and will only use the secondary metric to calculate the desired replica count if the primary metric returns 0 desired replicas.
A correctly configured `TotalNumberOfQueuedAndInProgressWorkflowRuns` can return non-zero desired replicas even when there are no runners other than [registration-only runners](#note-on-scaling-tofrom-0), hence the `PercentageRunnersBusy` + `TotalNumberOfQueuedAndInProgressWorkflowRuns` configuration makes scaling from zero possible. A correctly configured `TotalNumberOfQueuedAndInProgressWorkflowRuns` can return non-zero desired replicas even when there are no runners other than [registration-only runners](#note-on-scaling-tofrom-0), hence the `PercentageRunnersBusy` + `TotalNumberOfQueuedAndInProgressWorkflowRuns` configuration makes scaling from zero possible.
@@ -700,12 +701,10 @@ Similarly, Webhook-based autoscaling works regardless of there are active runner
> This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0) > This feature requires controller version => [v0.19.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.19.0)
`Scheduled Overrides` allows you to configure HorizontalRunnerAutoscaler so that its Spec gets updated only during a certain period of time. `Scheduled Overrides` allows you to configure HorizontalRunnerAutoscaler so that its Spec gets updated only during a certain period of time. This feature is usually used for following scenarios:
usually, this feature is used for following scenarios: - You want to reduce your infrastructure costs by scaling your Kubernetes nodes down outside of business hours
- You want to scale for scheduled spikes in workloads
- You want to pay for your infrastructure cost running runners only in business hours
- You want to prepare for scheduled spikes in workloads
For the first scenario, you might consider configuration like the below: For the first scenario, you might consider configuration like the below:
@@ -1021,10 +1020,7 @@ Note that there's no official Istio integration in actions-runner-controller. It
### Stateful Runners ### Stateful Runners
> This is a documentation about a unreleased version of actions-runner-controller. > This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0)
>
> It would be great if you could try building the latest controller image following https://github.com/actions-runner-controller/actions-runner-controller#contributing if you are eager to test it early and help
> developers by reporting any bugs :smile:
`actions-runner-controller` supports `RunnerSet` API that let you deploy stateful runners. A stateful runner is designed to be able to store some data persists across GitHub Actions workflow and job runs. You might find it useful, for example, to speed up your docker builds by persisting the docker layer cache. `actions-runner-controller` supports `RunnerSet` API that let you deploy stateful runners. A stateful runner is designed to be able to store some data persists across GitHub Actions workflow and job runs. You might find it useful, for example, to speed up your docker builds by persisting the docker layer cache.

View File

@@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes # This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version. # to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/) # Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.13.0 version: 0.13.2
# Used as the default manager tag value when no tag property is provided in the values.yaml # Used as the default manager tag value when no tag property is provided in the values.yaml
appVersion: 0.20.0 appVersion: 0.20.2
home: https://github.com/actions-runner-controller/actions-runner-controller home: https://github.com/actions-runner-controller/actions-runner-controller

View File

@@ -76,7 +76,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(st scaleTa
return nil, nil return nil, nil
} else if numMetrics > 2 { } else if numMetrics > 2 {
return nil, fmt.Errorf("Too many autoscaling metrics configured: It must be 0 to 2, but got %d", numMetrics) return nil, fmt.Errorf("too many autoscaling metrics configured: It must be 0 to 2, but got %d", numMetrics)
} }
primaryMetric := metrics[0] primaryMetric := metrics[0]
@@ -93,7 +93,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestDesiredReplicas(st scaleTa
case v1alpha1.AutoscalingMetricTypePercentageRunnersBusy: case v1alpha1.AutoscalingMetricTypePercentageRunnersBusy:
suggested, err = r.suggestReplicasByPercentageRunnersBusy(st, hra, primaryMetric) suggested, err = r.suggestReplicasByPercentageRunnersBusy(st, hra, primaryMetric)
default: default:
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", primaryMetric) return nil, fmt.Errorf("validating autoscaling metrics: unsupported metric type %q", primaryMetric)
} }
if err != nil { if err != nil {

View File

@@ -542,7 +542,7 @@ HRA:
// Ensure that the RunnerDeployment-managed runners have all the labels requested by the workflow_job. // Ensure that the RunnerDeployment-managed runners have all the labels requested by the workflow_job.
for _, l := range labels { for _, l := range labels {
var matched bool var matched bool
for _, l2 := range rd.Spec.Template.Labels { for _, l2 := range rd.Spec.Template.Spec.Labels {
if l == l2 { if l == l2 {
matched = true matched = true
break break

View File

@@ -114,6 +114,145 @@ func TestWebhookPing(t *testing.T) {
) )
} }
func TestWebhookWorkflowJob(t *testing.T) {
setupTest := func() github.WorkflowJobEvent {
f, err := os.Open("testdata/org_webhook_workflow_job_payload.json")
if err != nil {
t.Fatalf("could not open the fixture: %s", err)
}
defer f.Close()
var e github.WorkflowJobEvent
if err := json.NewDecoder(f).Decode(&e); err != nil {
t.Fatalf("invalid json: %s", err)
}
return e
}
t.Run("Successful", func(t *testing.T) {
e := setupTest()
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
ObjectMeta: metav1.ObjectMeta{
Name: "test-name",
},
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
Name: "test-name",
},
},
}
rd := &actionsv1alpha1.RunnerDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-name",
},
Spec: actionsv1alpha1.RunnerDeploymentSpec{
Template: actionsv1alpha1.RunnerTemplate{
Spec: actionsv1alpha1.RunnerSpec{
RunnerConfig: actionsv1alpha1.RunnerConfig{
Organization: "MYORG",
Labels: []string{"label1"},
},
},
},
},
}
initObjs := []runtime.Object{hra, rd}
testServerWithInitObjs(t,
"workflow_job",
&e,
200,
"scaled test-name by 1",
initObjs,
)
})
t.Run("WrongLabels", func(t *testing.T) {
e := setupTest()
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
ObjectMeta: metav1.ObjectMeta{
Name: "test-name",
},
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
Name: "test-name",
},
},
}
rd := &actionsv1alpha1.RunnerDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-name",
},
Spec: actionsv1alpha1.RunnerDeploymentSpec{
Template: actionsv1alpha1.RunnerTemplate{
Spec: actionsv1alpha1.RunnerSpec{
RunnerConfig: actionsv1alpha1.RunnerConfig{
Organization: "MYORG",
Labels: []string{"bad-label"},
},
},
},
},
}
initObjs := []runtime.Object{hra, rd}
testServerWithInitObjs(t,
"workflow_job",
&e,
200,
"no horizontalrunnerautoscaler to scale for this github event",
initObjs,
)
})
// This test verifies that the old way of matching labels doesn't work anymore
t.Run("OldLabels", func(t *testing.T) {
e := setupTest()
hra := &actionsv1alpha1.HorizontalRunnerAutoscaler{
ObjectMeta: metav1.ObjectMeta{
Name: "test-name",
},
Spec: actionsv1alpha1.HorizontalRunnerAutoscalerSpec{
ScaleTargetRef: actionsv1alpha1.ScaleTargetRef{
Name: "test-name",
},
},
}
rd := &actionsv1alpha1.RunnerDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-name",
},
Spec: actionsv1alpha1.RunnerDeploymentSpec{
Template: actionsv1alpha1.RunnerTemplate{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"label1": "label1",
},
},
Spec: actionsv1alpha1.RunnerSpec{
RunnerConfig: actionsv1alpha1.RunnerConfig{
Organization: "MYORG",
Labels: []string{"bad-label"},
},
},
},
},
}
initObjs := []runtime.Object{hra, rd}
testServerWithInitObjs(t,
"workflow_job",
&e,
200,
"no horizontalrunnerautoscaler to scale for this github event",
initObjs,
)
})
}
func TestGetRequest(t *testing.T) { func TestGetRequest(t *testing.T) {
hra := HorizontalRunnerAutoscalerGitHubWebhook{} hra := HorizontalRunnerAutoscalerGitHubWebhook{}
request, _ := http.NewRequest(http.MethodGet, "/", nil) request, _ := http.NewRequest(http.MethodGet, "/", nil)
@@ -177,13 +316,11 @@ func installTestLogger(webhook *HorizontalRunnerAutoscalerGitHubWebhook) *bytes.
return logs return logs
} }
func testServer(t *testing.T, eventType string, event interface{}, wantCode int, wantBody string) { func testServerWithInitObjs(t *testing.T, eventType string, event interface{}, wantCode int, wantBody string, initObjs []runtime.Object) {
t.Helper() t.Helper()
hraWebhook := &HorizontalRunnerAutoscalerGitHubWebhook{} hraWebhook := &HorizontalRunnerAutoscalerGitHubWebhook{}
var initObjs []runtime.Object
client := fake.NewFakeClientWithScheme(sc, initObjs...) client := fake.NewFakeClientWithScheme(sc, initObjs...)
logs := installTestLogger(hraWebhook) logs := installTestLogger(hraWebhook)
@@ -227,6 +364,11 @@ func testServer(t *testing.T, eventType string, event interface{}, wantCode int,
} }
} }
func testServer(t *testing.T, eventType string, event interface{}, wantCode int, wantBody string) {
var initObjs []runtime.Object
testServerWithInitObjs(t, eventType, event, wantCode, wantBody, initObjs)
}
func sendWebhook(server *httptest.Server, eventType string, event interface{}) (*http.Response, error) { func sendWebhook(server *httptest.Server, eventType string, event interface{}) (*http.Response, error) {
jsonBuf := &bytes.Buffer{} jsonBuf := &bytes.Buffer{}
enc := json.NewEncoder(jsonBuf) enc := json.NewEncoder(jsonBuf)

View File

@@ -50,7 +50,7 @@ type RunnerPodReconciler struct {
} }
const ( const (
// This names requires at leaset one slash to work. // This names requires at least one slash to work.
// See https://github.com/google/knative-gcp/issues/378 // See https://github.com/google/knative-gcp/issues/378
runnerPodFinalizerName = "actions.summerwind.dev/runner-pod" runnerPodFinalizerName = "actions.summerwind.dev/runner-pod"

View File

@@ -0,0 +1,151 @@
{
"action": "queued",
"workflow_job": {
"id": 1234567890,
"run_id": 1234567890,
"run_url": "https://api.github.com/repos/MYORG/MYREPO/actions/runs/1234567890",
"node_id": "CR_kwDOGCados7e1x2g",
"head_sha": "1234567890123456789012345678901234567890",
"url": "https://api.github.com/repos/MYORG/MYREPO/actions/jobs/1234567890",
"html_url": "https://github.com/MYORG/MYREPO/runs/1234567890",
"status": "queued",
"conclusion": null,
"started_at": "2021-09-28T23:45:29Z",
"completed_at": null,
"name": "build",
"steps": [],
"check_run_url": "https://api.github.com/repos/MYORG/MYREPO/check-runs/1234567890",
"labels": [
"label1"
]
},
"repository": {
"id": 1234567890,
"node_id": "ABCDEFGHIJKLMNOPQRSTUVWXYZ=",
"name": "MYREPO",
"full_name": "MYORG/MYREPO",
"private": true,
"owner": {
"login": "MYORG",
"id": 1234567890,
"node_id": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"avatar_url": "https://avatars.githubusercontent.com/u/1234567890?v=4",
"gravatar_id": "",
"url": "https://api.github.com/users/MYORG",
"html_url": "https://github.com/MYORG",
"followers_url": "https://api.github.com/users/MYORG/followers",
"following_url": "https://api.github.com/users/MYORG/following{/other_user}",
"gists_url": "https://api.github.com/users/MYORG/gists{/gist_id}",
"starred_url": "https://api.github.com/users/MYORG/starred{/owner}{/repo}",
"subscriptions_url": "https://api.github.com/users/MYORG/subscriptions",
"organizations_url": "https://api.github.com/users/MYORG/orgs",
"repos_url": "https://api.github.com/users/MYORG/repos",
"events_url": "https://api.github.com/users/MYORG/events{/privacy}",
"received_events_url": "https://api.github.com/users/MYORG/received_events",
"type": "Organization",
"site_admin": false
},
"html_url": "https://github.com/MYORG/MYREPO",
"description": "MYREPO",
"fork": false,
"url": "https://api.github.com/repos/MYORG/MYREPO",
"forks_url": "https://api.github.com/repos/MYORG/MYREPO/forks",
"keys_url": "https://api.github.com/repos/MYORG/MYREPO/keys{/key_id}",
"collaborators_url": "https://api.github.com/repos/MYORG/MYREPO/collaborators{/collaborator}",
"teams_url": "https://api.github.com/repos/MYORG/MYREPO/teams",
"hooks_url": "https://api.github.com/repos/MYORG/MYREPO/hooks",
"issue_events_url": "https://api.github.com/repos/MYORG/MYREPO/issues/events{/number}",
"events_url": "https://api.github.com/repos/MYORG/MYREPO/events",
"assignees_url": "https://api.github.com/repos/MYORG/MYREPO/assignees{/user}",
"branches_url": "https://api.github.com/repos/MYORG/MYREPO/branches{/branch}",
"tags_url": "https://api.github.com/repos/MYORG/MYREPO/tags",
"blobs_url": "https://api.github.com/repos/MYORG/MYREPO/git/blobs{/sha}",
"git_tags_url": "https://api.github.com/repos/MYORG/MYREPO/git/tags{/sha}",
"git_refs_url": "https://api.github.com/repos/MYORG/MYREPO/git/refs{/sha}",
"trees_url": "https://api.github.com/repos/MYORG/MYREPO/git/trees{/sha}",
"statuses_url": "https://api.github.com/repos/MYORG/MYREPO/statuses/{sha}",
"languages_url": "https://api.github.com/repos/MYORG/MYREPO/languages",
"stargazers_url": "https://api.github.com/repos/MYORG/MYREPO/stargazers",
"contributors_url": "https://api.github.com/repos/MYORG/MYREPO/contributors",
"subscribers_url": "https://api.github.com/repos/MYORG/MYREPO/subscribers",
"subscription_url": "https://api.github.com/repos/MYORG/MYREPO/subscription",
"commits_url": "https://api.github.com/repos/MYORG/MYREPO/commits{/sha}",
"git_commits_url": "https://api.github.com/repos/MYORG/MYREPO/git/commits{/sha}",
"comments_url": "https://api.github.com/repos/MYORG/MYREPO/comments{/number}",
"issue_comment_url": "https://api.github.com/repos/MYORG/MYREPO/issues/comments{/number}",
"contents_url": "https://api.github.com/repos/MYORG/MYREPO/contents/{+path}",
"compare_url": "https://api.github.com/repos/MYORG/MYREPO/compare/{base}...{head}",
"merges_url": "https://api.github.com/repos/MYORG/MYREPO/merges",
"archive_url": "https://api.github.com/repos/MYORG/MYREPO/{archive_format}{/ref}",
"downloads_url": "https://api.github.com/repos/MYORG/MYREPO/downloads",
"issues_url": "https://api.github.com/repos/MYORG/MYREPO/issues{/number}",
"pulls_url": "https://api.github.com/repos/MYORG/MYREPO/pulls{/number}",
"milestones_url": "https://api.github.com/repos/MYORG/MYREPO/milestones{/number}",
"notifications_url": "https://api.github.com/repos/MYORG/MYREPO/notifications{?since,all,participating}",
"labels_url": "https://api.github.com/repos/MYORG/MYREPO/labels{/name}",
"releases_url": "https://api.github.com/repos/MYORG/MYREPO/releases{/id}",
"deployments_url": "https://api.github.com/repos/MYORG/MYREPO/deployments",
"created_at": "2021-09-10T18:55:38Z",
"updated_at": "2021-09-10T18:55:41Z",
"pushed_at": "2021-09-28T23:25:26Z",
"git_url": "git://github.com/MYORG/MYREPO.git",
"ssh_url": "git@github.com:MYORG/MYREPO.git",
"clone_url": "https://github.com/MYORG/MYREPO.git",
"svn_url": "https://github.com/MYORG/MYREPO",
"homepage": null,
"size": 121,
"stargazers_count": 0,
"watchers_count": 0,
"language": null,
"has_issues": true,
"has_projects": true,
"has_downloads": true,
"has_wiki": true,
"has_pages": false,
"forks_count": 0,
"mirror_url": null,
"archived": false,
"disabled": false,
"open_issues_count": 1,
"license": null,
"allow_forking": false,
"forks": 0,
"open_issues": 1,
"watchers": 0,
"default_branch": "master"
},
"organization": {
"login": "MYORG",
"id": 1234567890,
"node_id": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"url": "https://api.github.com/orgs/MYORG",
"repos_url": "https://api.github.com/orgs/MYORG/repos",
"events_url": "https://api.github.com/orgs/MYORG/events",
"hooks_url": "https://api.github.com/orgs/MYORG/hooks",
"issues_url": "https://api.github.com/orgs/MYORG/issues",
"members_url": "https://api.github.com/orgs/MYORG/members{/member}",
"public_members_url": "https://api.github.com/orgs/MYORG/public_members{/member}",
"avatar_url": "https://avatars.githubusercontent.com/u/1234567890?v=4",
"description": ""
},
"sender": {
"login": "MYNAME",
"id": 1234567890,
"node_id": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"avatar_url": "https://avatars.githubusercontent.com/u/1234567890?v=4",
"gravatar_id": "",
"url": "https://api.github.com/users/MYNAME",
"html_url": "https://github.com/MYNAME",
"followers_url": "https://api.github.com/users/MYNAME/followers",
"following_url": "https://api.github.com/users/MYNAME/following{/other_user}",
"gists_url": "https://api.github.com/users/MYNAME/gists{/gist_id}",
"starred_url": "https://api.github.com/users/MYNAME/starred{/owner}{/repo}",
"subscriptions_url": "https://api.github.com/users/MYNAME/subscriptions",
"organizations_url": "https://api.github.com/users/MYNAME/orgs",
"repos_url": "https://api.github.com/users/MYNAME/repos",
"events_url": "https://api.github.com/users/MYNAME/events{/privacy}",
"received_events_url": "https://api.github.com/users/MYNAME/received_events",
"type": "User",
"site_admin": false
}
}

View File

@@ -0,0 +1,136 @@
package main
import (
"context"
"errors"
"flag"
"fmt"
"net/http"
"os"
"os/signal"
"sync"
"syscall"
"github.com/actions-runner-controller/actions-runner-controller/github"
"github.com/actions-runner-controller/actions-runner-controller/pkg/githubwebhookdeliveryforwarder"
"github.com/kelseyhightower/envconfig"
)
func main() {
var (
metricsAddr string
target string
repo string
)
var c github.Config
if err := envconfig.Process("github", &c); err != nil {
fmt.Fprintln(os.Stderr, "Error: Environment variable read failed.")
}
flag.StringVar(&metricsAddr, "metrics-addr", ":8000", "The address the metric endpoint binds to.")
flag.StringVar(&repo, "repo", "", "The owner/name of the repository that has the target hook. If specified, the forwarder will use the first hook configured on the repository as the source.")
flag.StringVar(&target, "target", "", "The URL of the forwarding target that receives all the forwarded webhooks.")
flag.StringVar(&c.Token, "github-token", c.Token, "The personal access token of GitHub.")
flag.Int64Var(&c.AppID, "github-app-id", c.AppID, "The application ID of GitHub App.")
flag.Int64Var(&c.AppInstallationID, "github-app-installation-id", c.AppInstallationID, "The installation ID of GitHub App.")
flag.StringVar(&c.AppPrivateKey, "github-app-private-key", c.AppPrivateKey, "The path of a private key file to authenticate as a GitHub App")
flag.Parse()
ghClient, err := c.NewClient()
if err != nil {
fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err)
os.Exit(1)
}
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
fwd := githubwebhookdeliveryforwarder.New(ghClient, target)
fwd.Repo = repo
mux := http.NewServeMux()
mux.HandleFunc("/readyz", fwd.HandleReadyz)
srv := http.Server{
Addr: metricsAddr,
Handler: mux,
}
wg.Add(1)
go func() {
defer cancel()
defer wg.Done()
if err := fwd.Run(ctx); err != nil {
fmt.Fprintf(os.Stderr, "problem running forwarder: %v\n", err)
}
}()
wg.Add(1)
go func() {
defer cancel()
defer wg.Done()
go func() {
<-ctx.Done()
srv.Shutdown(context.Background())
}()
if err := srv.ListenAndServe(); err != nil {
if !errors.Is(err, http.ErrServerClosed) {
fmt.Fprintf(os.Stderr, "problem running http server: %v\n", err)
}
}
}()
go func() {
<-SetupSignalHandler().Done()
cancel()
}()
wg.Wait()
}
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
var onlyOneSignalHandler = make(chan struct{})
var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM}
// SetupSignalHandler registers for SIGTERM and SIGINT. A stop channel is returned
// which is closed on one of these signals. If a second signal is caught, the program
// is terminated with exit code 1.
func SetupSignalHandler() context.Context {
close(onlyOneSignalHandler) // panics when called twice
ctx, cancel := context.WithCancel(context.Background())
c := make(chan os.Signal, 2)
signal.Notify(c, shutdownSignals...)
go func() {
<-c
cancel()
<-c
os.Exit(1) // second signal. Exit directly.
}()
return ctx
}

View File

@@ -0,0 +1,202 @@
package githubwebhookdeliveryforwarder
import (
"bytes"
"context"
"fmt"
"net/http"
"os"
"sort"
"strings"
"time"
"github.com/actions-runner-controller/actions-runner-controller/github"
gogithub "github.com/google/go-github/v37/github"
)
type server struct {
target string
Repo string
client *github.Client
}
func New(client *github.Client, target string) *server {
var srv server
srv.target = target
srv.client = client
return &srv
}
func (s *server) Run(ctx context.Context) error {
segments := strings.Split(s.Repo, "/")
if len(segments) != 2 {
return fmt.Errorf("repository must be in a form of OWNER/REPO: got %q", s.Repo)
}
owner, repo := segments[0], segments[1]
hooks, _, err := s.client.Repositories.ListHooks(ctx, owner, repo, nil)
if err != nil {
s.Errorf("Failed listing hooks: %v", err)
return err
}
var hook *gogithub.Hook
for i := range hooks {
hook = hooks[i]
break
}
cur := &cursor{}
cur.deliveredAt = time.Now()
for {
var (
err error
payloads [][]byte
)
payloads, cur, err = s.getUnprocessedDeliveries(ctx, owner, repo, hook.GetID(), *cur)
if err != nil {
s.Errorf("failed getting unprocessed deliveries: %v", err)
}
for _, p := range payloads {
if _, err := http.Post(s.target, "application/json", bytes.NewReader(p)); err != nil {
s.Errorf("failed forwarding delivery: %v", err)
}
}
time.Sleep(10 * time.Second)
}
}
type cursor struct {
deliveredAt time.Time
id int64
}
func (s *server) getUnprocessedDeliveries(ctx context.Context, owner, repo string, hookID int64, pos cursor) ([][]byte, *cursor, error) {
var (
opts gogithub.ListCursorOptions
)
opts.PerPage = 2
var deliveries []*gogithub.HookDelivery
OUTER:
for {
ds, resp, err := s.client.Repositories.ListHookDeliveries(ctx, owner, repo, hookID, &opts)
if err != nil {
return nil, nil, err
}
opts.Cursor = resp.Cursor
for _, d := range ds {
d, _, err := s.client.Repositories.GetHookDelivery(ctx, owner, repo, hookID, d.GetID())
if err != nil {
return nil, nil, err
}
payload, err := d.ParseRequestPayload()
if err != nil {
return nil, nil, err
}
id := d.GetID()
deliveredAt := d.GetDeliveredAt()
if !pos.deliveredAt.IsZero() && deliveredAt.Before(pos.deliveredAt) {
s.Logf("%s is before %s so skipping all the remaining deliveries", deliveredAt, pos.deliveredAt)
break OUTER
}
if pos.id != 0 && id <= pos.id {
break OUTER
}
s.Logf("Received %T at %s: %v", payload, deliveredAt, payload)
if deliveredAt.After(pos.deliveredAt) {
pos.deliveredAt = deliveredAt.Time
}
if id > pos.id {
pos.id = id
}
}
if opts.Cursor == "" {
break
}
time.Sleep(1 * time.Second)
}
sort.Slice(deliveries, func(a, b int) bool {
return deliveries[b].GetDeliveredAt().After(deliveries[a].GetDeliveredAt().Time)
})
var payloads [][]byte
for _, d := range deliveries {
payloads = append(payloads, *d.Request.RawPayload)
}
return payloads, &pos, nil
}
func (s *server) HandleReadyz(w http.ResponseWriter, r *http.Request) {
var (
ok bool
err error
)
defer func() {
if !ok {
w.WriteHeader(http.StatusInternalServerError)
if err != nil {
msg := err.Error()
if _, err := w.Write([]byte(msg)); err != nil {
s.Errorf("failed writing http error response: %v", err)
}
}
}
}()
defer func() {
if r.Body != nil {
r.Body.Close()
}
}()
// respond ok to GET / e.g. for health check
if r.Method == http.MethodGet {
fmt.Fprintln(w, "webhook server is running")
return
}
w.WriteHeader(http.StatusOK)
if _, err := w.Write([]byte("ok")); err != nil {
s.Errorf("failed writing http response: %v", err)
}
}
func (s *server) Logf(format string, args ...interface{}) {
fmt.Fprintf(os.Stdout, format+"\n", args...)
}
func (s *server) Errorf(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
}