From 8e52a6d2cf2d5eb44373034cf8f870c0209d9234 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Sun, 12 Feb 2023 01:55:12 +0100 Subject: [PATCH] EphemeralRunner: On cleanup, if pod is pending, delete from service (#2255) Co-authored-by: Tingluo Huang --- .../ephemeralrunner_controller.go | 63 ++++++++++++++++++- .../ephemeralrunner_controller_test.go | 12 ++-- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/controllers/actions.github.com/ephemeralrunner_controller.go b/controllers/actions.github.com/ephemeralrunner_controller.go index f2aca7b1..e6bfc9cb 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller.go +++ b/controllers/actions.github.com/ephemeralrunner_controller.go @@ -43,7 +43,8 @@ const ( // It represents the name of the container running the self-hosted runner image. EphemeralRunnerContainerName = "runner" - ephemeralRunnerFinalizerName = "ephemeralrunner.actions.github.com/finalizer" + ephemeralRunnerFinalizerName = "ephemeralrunner.actions.github.com/finalizer" + ephemeralRunnerActionsFinalizerName = "ephemeralrunner.actions.github.com/runner-registration-finalizer" ) // EphemeralRunnerReconciler reconciles a EphemeralRunner object @@ -80,6 +81,24 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, nil } + if controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerActionsFinalizerName) { + switch ephemeralRunner.Status.Phase { + case corev1.PodSucceeded: + // deleted by the runner set, we can just remove finalizer without API calls + err := patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { + controllerutil.RemoveFinalizer(obj, ephemeralRunnerActionsFinalizerName) + }) + if err != nil { + log.Error(err, "Failed to update ephemeral runner without runner registration finalizer") + return ctrl.Result{}, err + } + log.Info("Successfully removed runner registration finalizer") + return ctrl.Result{}, nil + default: + return r.cleanupRunnerFromService(ctx, ephemeralRunner, log) + } + } + log.Info("Finalizing ephemeral runner") done, err := r.cleanupResources(ctx, ephemeralRunner, log) if err != nil { @@ -114,6 +133,19 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, nil } + if !controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerActionsFinalizerName) { + log.Info("Adding runner registration finalizer") + err := patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { + controllerutil.AddFinalizer(obj, ephemeralRunnerActionsFinalizerName) + }) + if err != nil { + log.Error(err, "Failed to update with runner registration finalizer set") + return ctrl.Result{}, err + } + + log.Info("Successfully added runner registration finalizer") + } + if !controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerFinalizerName) { log.Info("Adding finalizer") if err := patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { @@ -236,6 +268,33 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ } } +func (r *EphemeralRunnerReconciler) cleanupRunnerFromService(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) { + actionsError := &actions.ActionsError{} + err := r.deleteRunnerFromService(ctx, ephemeralRunner, log) + if err != nil { + if errors.As(err, &actionsError) && + actionsError.StatusCode == http.StatusBadRequest && + strings.Contains(actionsError.ExceptionName, "JobStillRunningException") { + log.Info("Runner is still running the job. Re-queue in 30 seconds") + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + log.Error(err, "Failed clean up runner from the service") + return ctrl.Result{}, err + } + + log.Info("Successfully removed runner registration from service") + err = patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { + controllerutil.RemoveFinalizer(obj, ephemeralRunnerActionsFinalizerName) + }) + if err != nil { + return ctrl.Result{}, err + } + + log.Info("Successfully removed runner registration finalizer") + return ctrl.Result{}, nil +} + func (r *EphemeralRunnerReconciler) cleanupResources(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (deleted bool, err error) { log.Info("Cleaning up the runner pod") pod := new(corev1.Pod) @@ -614,7 +673,7 @@ func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context, log.Info("Removing runner from the service", "runnerId", ephemeralRunner.Status.RunnerId) err = client.RemoveRunner(ctx, int64(ephemeralRunner.Status.RunnerId)) if err != nil { - return fmt.Errorf("failed to remove runner from the service: %v", err) + return fmt.Errorf("failed to remove runner from the service: %w", err) } log.Info("Removed runner from the service", "runnerId", ephemeralRunner.Status.RunnerId) diff --git a/controllers/actions.github.com/ephemeralrunner_controller_test.go b/controllers/actions.github.com/ephemeralrunner_controller_test.go index f8166abe..ba5d9fb2 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunner_controller_test.go @@ -153,19 +153,21 @@ var _ = Describe("EphemeralRunner", func() { created := new(v1alpha1.EphemeralRunner) // Check if finalizer is added Eventually( - func() (string, error) { + func() ([]string, error) { err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, created) if err != nil { - return "", err + return nil, err } if len(created.Finalizers) == 0 { - return "", nil + return nil, nil } - return created.Finalizers[0], nil + + n := len(created.Finalizers) // avoid capacity mismatch + return created.Finalizers[:n:n], nil }, timeout, interval, - ).Should(BeEquivalentTo(ephemeralRunnerFinalizerName)) + ).Should(BeEquivalentTo([]string{ephemeralRunnerActionsFinalizerName, ephemeralRunnerFinalizerName})) Eventually( func() (bool, error) {