feat: RunnerSet backed by StatefulSet (#629)

* feat: RunnerSet backed by StatefulSet Unlike a runner deployment, a runner set can manage a set of stateful runners by combining a statefulset and an admission webhook that mutates statefulset-managed pods with required envvars and registration tokens. Resolves #613 Ref #612 * Upgrade controller-runtime to 0.9.0 * Bump Go to 1.16.x following controller-runtime 0.9.0 * Upgrade kubebuilder to 2.3.2 for updated etcd and apiserver following local setup * Fix startup failure due to missing LeaderElectionID * Fix the issue that any pods become unable to start once actions-runner-controller got failed after the mutating webhook has been registered * Allow force-updating statefulset * Fix runner container missing work and certs-client volume mounts and DOCKER_HOST and DOCKER_TLS_VERIFY envvars when dockerdWithinRunner=false * Fix runnerset-controller not applying statefulset.spec.template.spec changes when there were no changes in runnerset spec * Enable running acceptance tests against arbitrary kind cluster * RunnerSet supports non-ephemeral runners only today * fix: docker-build from root Makefile on intel mac * fix: arch check fixes for mac and ARM * ci: aligning test data format and patching checks * fix: removing namespace in test data * chore: adding more ignores * chore: removing leading space in shebang * Re-add metrics to org hra testdata * Bump cert-manager to v1.1.1 and fix deploy.sh Co-authored-by: toast-gear <15716903+toast-gear@users.noreply.github.com> Co-authored-by: Callum James Tait <callum.tait@photobox.com>
2025-12-10 19:50:30 +00:00 · 2021-06-22 17:10:09 +09:00
parent af0ca03752
commit 9e4dbf497c
54 changed files with 28303 additions and 10624 deletions
--- a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@@ -1,26 +1,13 @@

 ---
-apiVersion: apiextensions.k8s.io/v1beta1
+apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
  annotations:
-    controller-gen.kubebuilder.io/version: v0.3.0
+    controller-gen.kubebuilder.io/version: v0.6.0
  creationTimestamp: null
  name: horizontalrunnerautoscalers.actions.summerwind.dev
 spec:
-  additionalPrinterColumns:
-  - JSONPath: .spec.minReplicas
-    name: Min
-    type: number
-  - JSONPath: .spec.maxReplicas
-    name: Max
-    type: number
-  - JSONPath: .status.desiredReplicas
-    name: Desired
-    type: number
-  - JSONPath: .status.scheduledOverridesSummary
-    name: Schedule
-    type: string
  group: actions.summerwind.dev
  names:
    kind: HorizontalRunnerAutoscaler
@@ -28,258 +15,274 @@ spec:
    plural: horizontalrunnerautoscalers
    singular: horizontalrunnerautoscaler
  scope: Namespaced
-  subresources:
-    status: {}
-  validation:
-    openAPIV3Schema:
-      description: HorizontalRunnerAutoscaler is the Schema for the horizontalrunnerautoscaler
-        API
-      properties:
-        apiVersion:
-          description: 'APIVersion defines the versioned schema of this representation
-            of an object. Servers should convert recognized schemas to the latest
-            internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
-          type: string
-        kind:
-          description: 'Kind is a string value representing the REST resource this
-            object represents. Servers may infer this from the endpoint the client
-            submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
-          type: string
-        metadata:
-          type: object
-        spec:
-          description: HorizontalRunnerAutoscalerSpec defines the desired state of
-            HorizontalRunnerAutoscaler
-          properties:
-            capacityReservations:
-              items:
-                description: CapacityReservation specifies the number of replicas
-                  temporarily added to the scale target until ExpirationTime.
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .spec.minReplicas
+      name: Min
+      type: number
+    - jsonPath: .spec.maxReplicas
+      name: Max
+      type: number
+    - jsonPath: .status.desiredReplicas
+      name: Desired
+      type: number
+    - jsonPath: .status.scheduledOverridesSummary
+      name: Schedule
+      type: string
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: HorizontalRunnerAutoscaler is the Schema for the horizontalrunnerautoscaler
+          API
+        properties:
+          apiVersion:
+            description: 'APIVersion defines the versioned schema of this representation
+              of an object. Servers should convert recognized schemas to the latest
+              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+            type: string
+          kind:
+            description: 'Kind is a string value representing the REST resource this
+              object represents. Servers may infer this from the endpoint the client
+              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: HorizontalRunnerAutoscalerSpec defines the desired state
+              of HorizontalRunnerAutoscaler
+            properties:
+              capacityReservations:
+                items:
+                  description: CapacityReservation specifies the number of replicas
+                    temporarily added to the scale target until ExpirationTime.
+                  properties:
+                    expirationTime:
+                      format: date-time
+                      type: string
+                    name:
+                      type: string
+                    replicas:
+                      type: integer
+                  type: object
+                type: array
+              maxReplicas:
+                description: MinReplicas is the maximum number of replicas the deployment
+                  is allowed to scale
+                type: integer
+              metrics:
+                description: Metrics is the collection of various metric targets to
+                  calculate desired number of runners
+                items:
+                  properties:
+                    repositoryNames:
+                      description: RepositoryNames is the list of repository names
+                        to be used for calculating the metric. For example, a repository
+                        name is the REPO part of `github.com/USER/REPO`.
+                      items:
+                        type: string
+                      type: array
+                    scaleDownAdjustment:
+                      description: ScaleDownAdjustment is the number of runners removed
+                        on scale-down. You can only specify either ScaleDownFactor
+                        or ScaleDownAdjustment.
+                      type: integer
+                    scaleDownFactor:
+                      description: ScaleDownFactor is the multiplicative factor applied
+                        to the current number of runners used to determine how many
+                        pods should be removed.
+                      type: string
+                    scaleDownThreshold:
+                      description: ScaleDownThreshold is the percentage of busy runners
+                        less than which will trigger the hpa to scale the runners
+                        down.
+                      type: string
+                    scaleUpAdjustment:
+                      description: ScaleUpAdjustment is the number of runners added
+                        on scale-up. You can only specify either ScaleUpFactor or
+                        ScaleUpAdjustment.
+                      type: integer
+                    scaleUpFactor:
+                      description: ScaleUpFactor is the multiplicative factor applied
+                        to the current number of runners used to determine how many
+                        pods should be added.
+                      type: string
+                    scaleUpThreshold:
+                      description: ScaleUpThreshold is the percentage of busy runners
+                        greater than which will trigger the hpa to scale runners up.
+                      type: string
+                    type:
+                      description: Type is the type of metric to be used for autoscaling.
+                        The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
+                      type: string
+                  type: object
+                type: array
+              minReplicas:
+                description: MinReplicas is the minimum number of replicas the deployment
+                  is allowed to scale
+                type: integer
+              scaleDownDelaySecondsAfterScaleOut:
+                description: ScaleDownDelaySecondsAfterScaleUp is the approximate
+                  delay for a scale down followed by a scale up Used to prevent flapping
+                  (down->up->down->... loop)
+                type: integer
+              scaleTargetRef:
+                description: ScaleTargetRef sis the reference to scaled resource like
+                  RunnerDeployment
                properties:
-                  expirationTime:
-                    format: date-time
-                    type: string
                  name:
                    type: string
-                  replicas:
-                    type: integer
                type: object
-              type: array
-            maxReplicas:
-              description: MinReplicas is the maximum number of replicas the deployment
-                is allowed to scale
-              type: integer
-            metrics:
-              description: Metrics is the collection of various metric targets to
-                calculate desired number of runners
-              items:
-                properties:
-                  repositoryNames:
-                    description: RepositoryNames is the list of repository names to
-                      be used for calculating the metric. For example, a repository
-                      name is the REPO part of `github.com/USER/REPO`.
-                    items:
+              scaleUpTriggers:
+                description: "ScaleUpTriggers is an experimental feature to increase
+                  the desired replicas by 1 on each webhook requested received by
+                  the webhookBasedAutoscaler. \n This feature requires you to also
+                  enable and deploy the webhookBasedAutoscaler onto your cluster.
+                  \n Note that the added runners remain until the next sync period
+                  at least, and they may or may not be used by GitHub Actions depending
+                  on the timing. They are intended to be used to gain \"resource slack\"
+                  immediately after you receive a webhook from GitHub, so that you
+                  can loosely expect MinReplicas runners to be always available."
+                items:
+                  properties:
+                    amount:
+                      type: integer
+                    duration:
                      type: string
-                    type: array
-                  scaleDownAdjustment:
-                    description: ScaleDownAdjustment is the number of runners removed
-                      on scale-down. You can only specify either ScaleDownFactor or
-                      ScaleDownAdjustment.
-                    type: integer
-                  scaleDownFactor:
-                    description: ScaleDownFactor is the multiplicative factor applied
-                      to the current number of runners used to determine how many
-                      pods should be removed.
-                    type: string
-                  scaleDownThreshold:
-                    description: ScaleDownThreshold is the percentage of busy runners
-                      less than which will trigger the hpa to scale the runners down.
-                    type: string
-                  scaleUpAdjustment:
-                    description: ScaleUpAdjustment is the number of runners added
-                      on scale-up. You can only specify either ScaleUpFactor or ScaleUpAdjustment.
-                    type: integer
-                  scaleUpFactor:
-                    description: ScaleUpFactor is the multiplicative factor applied
-                      to the current number of runners used to determine how many
-                      pods should be added.
-                    type: string
-                  scaleUpThreshold:
-                    description: ScaleUpThreshold is the percentage of busy runners
-                      greater than which will trigger the hpa to scale runners up.
-                    type: string
-                  type:
-                    description: Type is the type of metric to be used for autoscaling.
-                      The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
-                    type: string
-                type: object
-              type: array
-            minReplicas:
-              description: MinReplicas is the minimum number of replicas the deployment
-                is allowed to scale
-              type: integer
-            scaleDownDelaySecondsAfterScaleOut:
-              description: ScaleDownDelaySecondsAfterScaleUp is the approximate delay
-                for a scale down followed by a scale up Used to prevent flapping (down->up->down->...
-                loop)
-              type: integer
-            scaleTargetRef:
-              description: ScaleTargetRef sis the reference to scaled resource like
-                RunnerDeployment
-              properties:
-                name:
-                  type: string
-              type: object
-            scaleUpTriggers:
-              description: "ScaleUpTriggers is an experimental feature to increase
-                the desired replicas by 1 on each webhook requested received by the
-                webhookBasedAutoscaler. \n This feature requires you to also enable
-                and deploy the webhookBasedAutoscaler onto your cluster. \n Note that
-                the added runners remain until the next sync period at least, and
-                they may or may not be used by GitHub Actions depending on the timing.
-                They are intended to be used to gain \"resource slack\" immediately
-                after you receive a webhook from GitHub, so that you can loosely expect
-                MinReplicas runners to be always available."
-              items:
-                properties:
-                  amount:
-                    type: integer
-                  duration:
-                    type: string
-                  githubEvent:
-                    properties:
-                      checkRun:
-                        description: https://docs.github.com/en/actions/reference/events-that-trigger-workflows#check_run
-                        properties:
-                          names:
-                            description: Names is a list of GitHub Actions glob patterns.
-                              Any check_run event whose name matches one of patterns
-                              in the list can trigger autoscaling. Note that check_run
-                              name seem to equal to the job name you've defined in
-                              your actions workflow yaml file. So it is very likely
-                              that you can utilize this to trigger depending on the
-                              job.
-                            items:
+                    githubEvent:
+                      properties:
+                        checkRun:
+                          description: https://docs.github.com/en/actions/reference/events-that-trigger-workflows#check_run
+                          properties:
+                            names:
+                              description: Names is a list of GitHub Actions glob
+                                patterns. Any check_run event whose name matches one
+                                of patterns in the list can trigger autoscaling. Note
+                                that check_run name seem to equal to the job name
+                                you've defined in your actions workflow yaml file.
+                                So it is very likely that you can utilize this to
+                                trigger depending on the job.
+                              items:
+                                type: string
+                              type: array
+                            status:
                              type: string
-                            type: array
-                          status:
-                            type: string
-                          types:
-                            items:
-                              type: string
-                            type: array
-                        type: object
-                      pullRequest:
-                        description: https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
-                        properties:
-                          branches:
-                            items:
-                              type: string
-                            type: array
-                          types:
-                            items:
-                              type: string
-                            type: array
-                        type: object
-                      push:
-                        description: PushSpec is the condition for triggering scale-up
-                          on push event Also see https://docs.github.com/en/actions/reference/events-that-trigger-workflows#push
-                        type: object
-                    type: object
-                type: object
-              type: array
-            scheduledOverrides:
-              description: ScheduledOverrides is the list of ScheduledOverride. It
-                can be used to override a few fields of HorizontalRunnerAutoscalerSpec
-                on schedule. The earlier a scheduled override is, the higher it is
-                prioritized.
-              items:
-                description: ScheduledOverride can be used to override a few fields
-                  of HorizontalRunnerAutoscalerSpec on schedule. A schedule can optionally
-                  be recurring, so that the correspoding override happens every day,
-                  week, month, or year.
-                properties:
-                  endTime:
-                    description: EndTime is the time at which the first override ends.
-                    format: date-time
-                    type: string
-                  minReplicas:
-                    description: MinReplicas is the number of runners while overriding.
-                      If omitted, it doesn't override minReplicas.
-                    minimum: 0
-                    nullable: true
-                    type: integer
-                  recurrenceRule:
-                    properties:
-                      frequency:
-                        description: Frequency is the name of a predefined interval
-                          of each recurrence. The valid values are "Daily", "Weekly",
-                          "Monthly", and "Yearly". If empty, the corresponding override
-                          happens only once.
-                        enum:
-                        - Daily
-                        - Weekly
-                        - Monthly
-                        - Yearly
-                        type: string
-                      untilTime:
-                        description: UntilTime is the time of the final recurrence.
-                          If empty, the schedule recurs forever.
-                        format: date-time
-                        type: string
-                    type: object
-                  startTime:
-                    description: StartTime is the time at which the first override
-                      starts.
-                    format: date-time
-                    type: string
-                required:
-                - endTime
-                - startTime
-                type: object
-              type: array
-          type: object
-        status:
-          properties:
-            cacheEntries:
-              items:
-                properties:
-                  expirationTime:
-                    format: date-time
-                    type: string
-                  key:
-                    type: string
-                  value:
-                    type: integer
-                type: object
-              type: array
-            desiredReplicas:
-              description: DesiredReplicas is the total number of desired, non-terminated
-                and latest pods to be set for the primary RunnerSet This doesn't include
-                outdated pods while upgrading the deployment and replacing the runnerset.
-              type: integer
-            lastSuccessfulScaleOutTime:
-              format: date-time
-              nullable: true
-              type: string
-            observedGeneration:
-              description: ObservedGeneration is the most recent generation observed
-                for the target. It corresponds to e.g. RunnerDeployment's generation,
-                which is updated on mutation by the API Server.
-              format: int64
-              type: integer
-            scheduledOverridesSummary:
-              description: ScheduledOverridesSummary is the summary of active and
-                upcoming scheduled overrides to be shown in e.g. a column of a `kubectl
-                get hra` output for observability.
-              type: string
-          type: object
-      type: object
-  version: v1alpha1
-  versions:
-  - name: v1alpha1
+                            types:
+                              items:
+                                type: string
+                              type: array
+                          type: object
+                        pullRequest:
+                          description: https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
+                          properties:
+                            branches:
+                              items:
+                                type: string
+                              type: array
+                            types:
+                              items:
+                                type: string
+                              type: array
+                          type: object
+                        push:
+                          description: PushSpec is the condition for triggering scale-up
+                            on push event Also see https://docs.github.com/en/actions/reference/events-that-trigger-workflows#push
+                          type: object
+                      type: object
+                  type: object
+                type: array
+              scheduledOverrides:
+                description: ScheduledOverrides is the list of ScheduledOverride.
+                  It can be used to override a few fields of HorizontalRunnerAutoscalerSpec
+                  on schedule. The earlier a scheduled override is, the higher it
+                  is prioritized.
+                items:
+                  description: ScheduledOverride can be used to override a few fields
+                    of HorizontalRunnerAutoscalerSpec on schedule. A schedule can
+                    optionally be recurring, so that the correspoding override happens
+                    every day, week, month, or year.
+                  properties:
+                    endTime:
+                      description: EndTime is the time at which the first override
+                        ends.
+                      format: date-time
+                      type: string
+                    minReplicas:
+                      description: MinReplicas is the number of runners while overriding.
+                        If omitted, it doesn't override minReplicas.
+                      minimum: 0
+                      nullable: true
+                      type: integer
+                    recurrenceRule:
+                      properties:
+                        frequency:
+                          description: Frequency is the name of a predefined interval
+                            of each recurrence. The valid values are "Daily", "Weekly",
+                            "Monthly", and "Yearly". If empty, the corresponding override
+                            happens only once.
+                          enum:
+                          - Daily
+                          - Weekly
+                          - Monthly
+                          - Yearly
+                          type: string
+                        untilTime:
+                          description: UntilTime is the time of the final recurrence.
+                            If empty, the schedule recurs forever.
+                          format: date-time
+                          type: string
+                      type: object
+                    startTime:
+                      description: StartTime is the time at which the first override
+                        starts.
+                      format: date-time
+                      type: string
+                  required:
+                  - endTime
+                  - startTime
+                  type: object
+                type: array
+            type: object
+          status:
+            properties:
+              cacheEntries:
+                items:
+                  properties:
+                    expirationTime:
+                      format: date-time
+                      type: string
+                    key:
+                      type: string
+                    value:
+                      type: integer
+                  type: object
+                type: array
+              desiredReplicas:
+                description: DesiredReplicas is the total number of desired, non-terminated
+                  and latest pods to be set for the primary RunnerSet This doesn't
+                  include outdated pods while upgrading the deployment and replacing
+                  the runnerset.
+                type: integer
+              lastSuccessfulScaleOutTime:
+                format: date-time
+                nullable: true
+                type: string
+              observedGeneration:
+                description: ObservedGeneration is the most recent generation observed
+                  for the target. It corresponds to e.g. RunnerDeployment's generation,
+                  which is updated on mutation by the API Server.
+                format: int64
+                type: integer
+              scheduledOverridesSummary:
+                description: ScheduledOverridesSummary is the summary of active and
+                  upcoming scheduled overrides to be shown in e.g. a column of a `kubectl
+                  get hra` output for observability.
+                type: string
+            type: object
+        type: object
    served: true
    storage: true
+    subresources:
+      status: {}
 status:
  acceptedNames:
    kind: ""