Prevent static runners from terminating due to unregister timeout

The unregister timeout of 1 minute (no matter how long it is) can negatively impact availability of static runner constantly running workflow jobs, and ephemeral runner that runs a long-running job.
We deal with that by completely removing the unregistaration timeout, so that regarldess of the type of runner(static or ephemeral) it waits forever until it successfully to get unregistered before being terminated.
This commit is contained in:
Yusuke Kuoka
2022-03-13 07:22:04 +00:00
parent a1c6d1d11a
commit c4b24f8366
8 changed files with 29 additions and 38 deletions

View File

@@ -45,7 +45,6 @@ type RunnerPodReconciler struct {
RegistrationRecheckInterval time.Duration
RegistrationRecheckJitter time.Duration
UnregistrationTimeout time.Duration
UnregistrationRetryDelay time.Duration
}
@@ -104,7 +103,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// In a standard scenario, the upstream controller, like runnerset-controller, ensures this runner to be gracefully stopped before the deletion timestamp is set.
// But for the case that the user manually deleted it for whatever reason,
// we have to ensure it to gracefully stop now.
updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
if res != nil {
return *res, err
}
@@ -172,7 +171,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
//
// In a standard scenario, ARC starts the unregistration process before marking the pod for deletion at all,
// so that it isn't subject to terminationGracePeriod and can safely take hours to finish it's work.
_, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
_, res, err := tickRunnerGracefulStop(ctx, r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
if res != nil {
return *res, err
}
@@ -190,15 +189,6 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, nil
}
func (r *RunnerPodReconciler) unregistrationTimeout() time.Duration {
unregistrationTimeout := DefaultUnregistrationTimeout
if r.UnregistrationTimeout > 0 {
unregistrationTimeout = r.UnregistrationTimeout
}
return unregistrationTimeout
}
func (r *RunnerPodReconciler) unregistrationRetryDelay() time.Duration {
retryDelay := DefaultUnregistrationRetryDelay