From 3486c54ccbb8181b3bb46e1a3f22c85f79aa5414 Mon Sep 17 00:00:00 2001 From: eric sciple Date: Tue, 4 Feb 2025 10:07:42 -0600 Subject: [PATCH] Do not retry CompleteJobAsync upon job-not-found (#3696) --- src/Runner.Common/Constants.cs | 1 + src/Runner.Common/RunServer.cs | 33 ++++++++++++++++++++++++++++++ src/Runner.Common/RunnerService.cs | 5 +++-- src/Runner.Worker/JobRunner.cs | 23 ++++++++++++++++++++- 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/Runner.Common/Constants.cs b/src/Runner.Common/Constants.cs index 2c20d1b16..53628d51a 100644 --- a/src/Runner.Common/Constants.cs +++ b/src/Runner.Common/Constants.cs @@ -160,6 +160,7 @@ namespace GitHub.Runner.Common { public static readonly string DiskSpaceWarning = "runner.diskspace.warning"; public static readonly string LogTemplateErrorsAsDebugMessages = "DistributedTask.LogTemplateErrorsAsDebugMessages"; + public static readonly string SkipRetryCompleteJobUponKnownErrors = "actions_skip_retry_complete_job_upon_known_errors"; public static readonly string UseContainerPathForTemplate = "DistributedTask.UseContainerPathForTemplate"; public static readonly string AllowRunnerContainerHooks = "DistributedTask.AllowRunnerContainerHooks"; } diff --git a/src/Runner.Common/RunServer.cs b/src/Runner.Common/RunServer.cs index a343bf0e3..9c3b1bdfb 100644 --- a/src/Runner.Common/RunServer.cs +++ b/src/Runner.Common/RunServer.cs @@ -32,6 +32,18 @@ namespace GitHub.Runner.Common string billingOwnerId, CancellationToken token); + Task CompleteJob2Async( + Guid planId, + Guid jobId, + TaskResult result, + Dictionary outputs, + IList stepResults, + IList jobAnnotations, + string environmentUrl, + IList telemetry, + string billingOwnerId, + CancellationToken token); + Task RenewJobAsync(Guid planId, Guid jobId, CancellationToken token); } @@ -70,6 +82,7 @@ namespace GitHub.Runner.Common ex is not TaskOrchestrationJobUnprocessableException); // HTTP status 422 } + // Legacy will be deleted when SkipRetryCompleteJobUponKnownErrors is cleaned up public Task CompleteJobAsync( Guid planId, Guid jobId, @@ -87,6 +100,26 @@ namespace GitHub.Runner.Common async () => await _runServiceHttpClient.CompleteJobAsync(requestUri, planId, jobId, result, outputs, stepResults, jobAnnotations, environmentUrl, telemetry, billingOwnerId, cancellationToken), cancellationToken); } + public Task CompleteJob2Async( + Guid planId, + Guid jobId, + TaskResult result, + Dictionary outputs, + IList stepResults, + IList jobAnnotations, + string environmentUrl, + IList telemetry, + string billingOwnerId, + CancellationToken cancellationToken) + { + CheckConnection(); + return RetryRequest( + async () => await _runServiceHttpClient.CompleteJobAsync(requestUri, planId, jobId, result, outputs, stepResults, jobAnnotations, environmentUrl, telemetry, billingOwnerId, cancellationToken), cancellationToken, + shouldRetry: ex => + ex is not VssUnauthorizedException && // HTTP status 401 + ex is not TaskOrchestrationJobNotFoundException); // HTTP status 404 + } + public Task RenewJobAsync(Guid planId, Guid jobId, CancellationToken cancellationToken) { CheckConnection(); diff --git a/src/Runner.Common/RunnerService.cs b/src/Runner.Common/RunnerService.cs index a18ff9674..f266d8a02 100644 --- a/src/Runner.Common/RunnerService.cs +++ b/src/Runner.Common/RunnerService.cs @@ -70,7 +70,8 @@ namespace GitHub.Runner.Common protected async Task RetryRequest(Func func, CancellationToken cancellationToken, - int maxRetryAttemptsCount = 5 + int maxRetryAttemptsCount = 5, + Func shouldRetry = null ) { async Task wrappedFunc() @@ -78,7 +79,7 @@ namespace GitHub.Runner.Common await func(); return Unit.Value; } - await RetryRequest(wrappedFunc, cancellationToken, maxRetryAttemptsCount); + await RetryRequest(wrappedFunc, cancellationToken, maxRetryAttemptsCount, shouldRetry); } protected async Task RetryRequest(Func> func, diff --git a/src/Runner.Worker/JobRunner.cs b/src/Runner.Worker/JobRunner.cs index 7c96ef84d..916c691fd 100644 --- a/src/Runner.Worker/JobRunner.cs +++ b/src/Runner.Worker/JobRunner.cs @@ -318,9 +318,30 @@ namespace GitHub.Runner.Worker { try { - await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, default); + if (jobContext.Global.Variables.GetBoolean(Constants.Runner.Features.SkipRetryCompleteJobUponKnownErrors) ?? false) + { + await runServer.CompleteJob2Async(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, default); + } + else + { + await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, default); + } return result; } + catch (VssUnauthorizedException ex) when (jobContext.Global.Variables.GetBoolean(Constants.Runner.Features.SkipRetryCompleteJobUponKnownErrors) ?? false) + { + Trace.Error($"Catch exception while attempting to complete job {message.JobId}, job request {message.RequestId}."); + Trace.Error(ex); + exceptions.Add(ex); + break; + } + catch (TaskOrchestrationJobNotFoundException ex) when (jobContext.Global.Variables.GetBoolean(Constants.Runner.Features.SkipRetryCompleteJobUponKnownErrors) ?? false) + { + Trace.Error($"Catch exception while attempting to complete job {message.JobId}, job request {message.RequestId}."); + Trace.Error(ex); + exceptions.Add(ex); + break; + } catch (Exception ex) { Trace.Error($"Catch exception while attempting to complete job {message.JobId}, job request {message.RequestId}.");