mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-11 03:57:01 +00:00
Enhance RunnerSet to optionally retain PVs accross restarts (#1340)
* Enhance RunnerSet to optionally retain PVs accross restarts This is our initial attempt to bring back the ability to retain PVs across runner pod restarts when using RunnerSet. The implementation is composed of two new controllers, `runnerpersistentvolumeclaim-controller` and `runnerpersistentvolume-controller`. It all starts from our existing `runnerset-controller`. The controller now tries to mark any PVCs created by StatefulSets created for the RunnerSet. Once the controller terminated statefulsets, their corresponding PVCs are clean up by `runnerpersistentvolumeclaim-controller`, then PVs are unbound from their corresponding PVCs by `runnerpersistentvolume-controller` so that they can be reused by future PVCs createf for future StatefulSets that shares the same same StorageClass. Ref #1286 * Update E2E test suite to cover runner, docker, and go caching with RunnerSet + PVs Ref #1286
This commit is contained in:
76
controllers/persistent_volume_claim_controller.go
Normal file
76
controllers/persistent_volume_claim_controller.go
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright 2022 The actions-runner-controller authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/tools/record"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// RunnerPersistentVolumeClaimReconciler reconciles a PersistentVolume object
|
||||
type RunnerPersistentVolumeClaimReconciler struct {
|
||||
client.Client
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
Name string
|
||||
}
|
||||
|
||||
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||
|
||||
func (r *RunnerPersistentVolumeClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||
log := r.Log.WithValues("pvc", req.NamespacedName)
|
||||
|
||||
var pvc corev1.PersistentVolumeClaim
|
||||
if err := r.Get(ctx, req.NamespacedName, &pvc); err != nil {
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
log.Info("Reconciling runner pvc")
|
||||
|
||||
res, err := syncPVC(ctx, r.Client, log, req.Namespace, &pvc)
|
||||
|
||||
if res == nil {
|
||||
res = &ctrl.Result{}
|
||||
}
|
||||
|
||||
return *res, err
|
||||
}
|
||||
|
||||
func (r *RunnerPersistentVolumeClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
name := "runnerpersistentvolumeclaim-controller"
|
||||
if r.Name != "" {
|
||||
name = r.Name
|
||||
}
|
||||
|
||||
r.Recorder = mgr.GetEventRecorderFor(name)
|
||||
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&corev1.PersistentVolumeClaim{}).
|
||||
Named(name).
|
||||
Complete(r)
|
||||
}
|
||||
72
controllers/persistent_volume_controller.go
Normal file
72
controllers/persistent_volume_controller.go
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright 2022 The actions-runner-controller authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/tools/record"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// RunnerPersistentVolumeReconciler reconciles a PersistentVolume object
|
||||
type RunnerPersistentVolumeReconciler struct {
|
||||
client.Client
|
||||
Log logr.Logger
|
||||
Recorder record.EventRecorder
|
||||
Scheme *runtime.Scheme
|
||||
Name string
|
||||
}
|
||||
|
||||
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||
|
||||
func (r *RunnerPersistentVolumeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||
log := r.Log.WithValues("pv", req.NamespacedName)
|
||||
|
||||
var pv corev1.PersistentVolume
|
||||
if err := r.Get(ctx, req.NamespacedName, &pv); err != nil {
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
res, err := syncPV(ctx, r.Client, log, req.Namespace, &pv)
|
||||
if res == nil {
|
||||
res = &ctrl.Result{}
|
||||
}
|
||||
|
||||
return *res, err
|
||||
}
|
||||
|
||||
func (r *RunnerPersistentVolumeReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
name := "runnerpersistentvolume-controller"
|
||||
if r.Name != "" {
|
||||
name = r.Name
|
||||
}
|
||||
|
||||
r.Recorder = mgr.GetEventRecorderFor(name)
|
||||
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&corev1.PersistentVolume{}).
|
||||
Named(name).
|
||||
Complete(r)
|
||||
}
|
||||
@@ -58,6 +58,7 @@ type RunnerSetReconciler struct {
|
||||
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnersets/status,verbs=get;update;patch
|
||||
// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch
|
||||
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
|
||||
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update
|
||||
|
||||
@@ -129,6 +130,12 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
owners = append(owners, &ss)
|
||||
}
|
||||
|
||||
if res, err := syncVolumes(ctx, r.Client, log, req.Namespace, runnerSet, statefulsets); err != nil {
|
||||
return ctrl.Result{}, err
|
||||
} else if res != nil {
|
||||
return *res, nil
|
||||
}
|
||||
|
||||
res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, newDesiredReplicas, func() client.Object { return create.DeepCopy() }, ephemeral, owners)
|
||||
if err != nil || res == nil {
|
||||
return ctrl.Result{}, err
|
||||
|
||||
175
controllers/sync_volumes.go
Normal file
175
controllers/sync_volumes.go
Normal file
@@ -0,0 +1,175 @@
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||
"github.com/go-logr/logr"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
)
|
||||
|
||||
const (
|
||||
labelKeyCleanup = "pending-cleanup"
|
||||
labelKeyRunnerStatefulSetName = "runner-statefulset-name"
|
||||
)
|
||||
|
||||
func syncVolumes(ctx context.Context, c client.Client, log logr.Logger, ns string, runnerSet *v1alpha1.RunnerSet, statefulsets []appsv1.StatefulSet) (*ctrl.Result, error) {
|
||||
log = log.WithValues("ns", ns)
|
||||
|
||||
for _, t := range runnerSet.Spec.StatefulSetSpec.VolumeClaimTemplates {
|
||||
for _, sts := range statefulsets {
|
||||
pvcName := fmt.Sprintf("%s-%s-0", t.Name, sts.Name)
|
||||
|
||||
var pvc corev1.PersistentVolumeClaim
|
||||
if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: pvcName}, &pvc); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO move this to statefulset reconciler so that we spam this less,
|
||||
// by starting the loop only after the statefulset got deletionTimestamp set.
|
||||
// Perhaps you can just wrap this in a finalizer here.
|
||||
if pvc.Labels[labelKeyRunnerStatefulSetName] == "" {
|
||||
updated := pvc.DeepCopy()
|
||||
updated.Labels[labelKeyRunnerStatefulSetName] = sts.Name
|
||||
if err := c.Update(ctx, updated); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.V(1).Info("Added runner-statefulset-name label to PVC", "sts", sts.Name, "pvc", pvcName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PVs are not namespaced hence we don't need client.InNamespace(ns).
|
||||
// If we added that, c.List will silently return zero items.
|
||||
//
|
||||
// This `List` needs to be done in a dedicated reconciler that is registered to the manager via the `For` func.
|
||||
// Otherwise the List func might return outdated contents(I saw status.phase being Bound even after K8s updated it to Released, and it lasted minutes).
|
||||
//
|
||||
// cleanupLabels := map[string]string{
|
||||
// labelKeyCleanup: runnerSet.Name,
|
||||
// }
|
||||
// pvList := &corev1.PersistentVolumeList{}
|
||||
// if err := c.List(ctx, pvList, client.MatchingLabels(cleanupLabels)); err != nil {
|
||||
// log.Info("retrying pv listing", "ns", ns, "err", err)
|
||||
// return nil, err
|
||||
// }
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func syncPVC(ctx context.Context, c client.Client, log logr.Logger, ns string, pvc *corev1.PersistentVolumeClaim) (*ctrl.Result, error) {
|
||||
stsName := pvc.Labels[labelKeyRunnerStatefulSetName]
|
||||
if stsName == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var sts appsv1.StatefulSet
|
||||
if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: stsName}, &sts); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// We assume that the statefulset is shortly terminated, hence retry forever until it gets removed.
|
||||
retry := 10 * time.Second
|
||||
log.V(1).Info("Retrying sync until statefulset gets removed", "requeueAfter", retry)
|
||||
return &ctrl.Result{RequeueAfter: retry}, nil
|
||||
}
|
||||
|
||||
log = log.WithValues("pvc", pvc.Name, "sts", stsName)
|
||||
|
||||
pvName := pvc.Spec.VolumeName
|
||||
|
||||
if pvName != "" {
|
||||
// If we deleted PVC before unsetting pv.spec.claimRef,
|
||||
// K8s seems to revive the claimRef :thinking:
|
||||
// So we need to mark PV for claimRef unset first, and delete PVC, and finally unset claimRef on PV.
|
||||
|
||||
var pv corev1.PersistentVolume
|
||||
if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: pvName}, &pv); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
pvCopy := pv.DeepCopy()
|
||||
if pvCopy.Labels == nil {
|
||||
pvCopy.Labels = map[string]string{}
|
||||
}
|
||||
pvCopy.Labels[labelKeyCleanup] = stsName
|
||||
|
||||
log.Info("Scheduling to unset PV's claimRef", "pv", pv.Name)
|
||||
|
||||
// Apparently K8s doesn't reconcile PV immediately after PVC deletion.
|
||||
// So we start a relatively busy loop of PV reconcilation slightly before the PVC deletion,
|
||||
// so that PV can be unbound as soon as possible after the PVC got deleted.
|
||||
if err := c.Update(ctx, pvCopy); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// At this point, the PV is still Bound
|
||||
|
||||
log.Info("Deleting unused pvc")
|
||||
|
||||
if err := c.Delete(ctx, pvc); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// At this point, the PV is still "Bound", but we are ready to unset pv.spec.claimRef in pv controller.
|
||||
// Once the pv controller unsets claimRef, the PV becomes "Released", hence available for reuse by another eligible PVC.
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func syncPV(ctx context.Context, c client.Client, log logr.Logger, ns string, pv *corev1.PersistentVolume) (*ctrl.Result, error) {
|
||||
log.V(2).Info("checking pv claimRef")
|
||||
|
||||
if pv.Spec.ClaimRef == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
log.V(2).Info("checking labels")
|
||||
|
||||
if pv.Labels[labelKeyCleanup] == "" {
|
||||
// We assume that the pvc is shortly terminated, hence retry forever until it gets removed.
|
||||
retry := 10 * time.Second
|
||||
log.V(1).Info("Retrying sync until pvc gets removed", "requeueAfter", retry)
|
||||
return &ctrl.Result{RequeueAfter: retry}, nil
|
||||
}
|
||||
|
||||
log.V(2).Info("checking pv phase", "phase", pv.Status.Phase)
|
||||
|
||||
if pv.Status.Phase != corev1.VolumeReleased {
|
||||
// We assume that the pvc is shortly terminated, hence retry forever until it gets removed.
|
||||
retry := 10 * time.Second
|
||||
log.V(1).Info("Retrying sync until pvc gets released", "requeueAfter", retry)
|
||||
return &ctrl.Result{RequeueAfter: retry}, nil
|
||||
}
|
||||
|
||||
// At this point, the PV is still Released
|
||||
|
||||
pvCopy := pv.DeepCopy()
|
||||
delete(pvCopy.Labels, labelKeyCleanup)
|
||||
pvCopy.Spec.ClaimRef = nil
|
||||
log.Info("Unsetting PV's claimRef", "pv", pv.Name)
|
||||
if err := c.Update(ctx, pvCopy); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// At this point, the PV becomes Available, if it's reclaim policy is "Retain".
|
||||
// I have not yet tested it with "Delete" but perhaps it's deleted automatically after the update?
|
||||
// https://kubernetes.io/docs/concepts/storage/persistent-volumes/#retain
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
Reference in New Issue
Block a user