From 1eb15f28a79df30b0786d0e9e57d80c3749b5d81 Mon Sep 17 00:00:00 2001 From: Tingluo Huang Date: Mon, 13 Oct 2025 16:21:32 -0400 Subject: [PATCH] Report job has infra failure to run-service (#4073) --- src/Runner.Common/RunServer.cs | 4 +++- src/Runner.Listener/JobDispatcher.cs | 2 +- src/Runner.Worker/ActionManager.cs | 10 +++++----- src/Runner.Worker/ExecutionContext.cs | 8 ++++++-- src/Runner.Worker/GlobalContext.cs | 1 + src/Runner.Worker/JobRunner.cs | 2 +- src/Sdk/RSWebApi/Contracts/CompleteJobRequest.cs | 3 +++ src/Sdk/RSWebApi/Contracts/IssueExtensions.cs | 1 + src/Sdk/RSWebApi/RunServiceHttpClient.cs | 2 ++ 9 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/Runner.Common/RunServer.cs b/src/Runner.Common/RunServer.cs index b57d2754b..cfdb98cb9 100644 --- a/src/Runner.Common/RunServer.cs +++ b/src/Runner.Common/RunServer.cs @@ -30,6 +30,7 @@ namespace GitHub.Runner.Common string environmentUrl, IList telemetry, string billingOwnerId, + string infrastructureFailureCategory, CancellationToken token); Task RenewJobAsync(Guid planId, Guid jobId, CancellationToken token); @@ -80,11 +81,12 @@ namespace GitHub.Runner.Common string environmentUrl, IList telemetry, string billingOwnerId, + string infrastructureFailureCategory, CancellationToken cancellationToken) { CheckConnection(); return RetryRequest( - async () => await _runServiceHttpClient.CompleteJobAsync(requestUri, planId, jobId, result, outputs, stepResults, jobAnnotations, environmentUrl, telemetry, billingOwnerId, cancellationToken), cancellationToken, + async () => await _runServiceHttpClient.CompleteJobAsync(requestUri, planId, jobId, result, outputs, stepResults, jobAnnotations, environmentUrl, telemetry, billingOwnerId, infrastructureFailureCategory, cancellationToken), cancellationToken, shouldRetry: ex => ex is not VssUnauthorizedException && // HTTP status 401 ex is not TaskOrchestrationJobNotFoundException); // HTTP status 404 diff --git a/src/Runner.Listener/JobDispatcher.cs b/src/Runner.Listener/JobDispatcher.cs index f98204b42..bbc09593c 100644 --- a/src/Runner.Listener/JobDispatcher.cs +++ b/src/Runner.Listener/JobDispatcher.cs @@ -1211,7 +1211,7 @@ namespace GitHub.Runner.Listener jobAnnotations.Add(annotation.Value); } - await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, TaskResult.Failed, outputs: null, stepResults: null, jobAnnotations: jobAnnotations, environmentUrl: null, telemetry: null, billingOwnerId: message.BillingOwnerId, CancellationToken.None); + await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, TaskResult.Failed, outputs: null, stepResults: null, jobAnnotations: jobAnnotations, environmentUrl: null, telemetry: null, billingOwnerId: message.BillingOwnerId, infrastructureFailureCategory: null, CancellationToken.None); } catch (Exception ex) { diff --git a/src/Runner.Worker/ActionManager.cs b/src/Runner.Worker/ActionManager.cs index 9a21aeb4c..c2af24bbc 100644 --- a/src/Runner.Worker/ActionManager.cs +++ b/src/Runner.Worker/ActionManager.cs @@ -111,7 +111,7 @@ namespace GitHub.Runner.Worker { // Log the error and fail the PrepareActionsAsync Initialization. Trace.Error($"Caught exception from PrepareActionsAsync Initialization: {ex}"); - executionContext.InfrastructureError(ex.Message); + executionContext.InfrastructureError(ex.Message, category: "resolve_action"); executionContext.Result = TaskResult.Failed; throw; } @@ -119,7 +119,7 @@ namespace GitHub.Runner.Worker { // Log the error and fail the PrepareActionsAsync Initialization. Trace.Error($"Caught exception from PrepareActionsAsync Initialization: {ex}"); - executionContext.InfrastructureError(ex.Message); + executionContext.InfrastructureError(ex.Message, category: "invalid_action_download"); executionContext.Result = TaskResult.Failed; throw; } @@ -777,15 +777,15 @@ namespace GitHub.Runner.Worker IOUtil.DeleteDirectory(destDirectory, executionContext.CancellationToken); Directory.CreateDirectory(destDirectory); - if (downloadInfo.PackageDetails != null) + if (downloadInfo.PackageDetails != null) { executionContext.Output($"##[group]Download immutable action package '{downloadInfo.NameWithOwner}@{downloadInfo.Ref}'"); executionContext.Output($"Version: {downloadInfo.PackageDetails.Version}"); executionContext.Output($"Digest: {downloadInfo.PackageDetails.ManifestDigest}"); executionContext.Output($"Source commit SHA: {downloadInfo.ResolvedSha}"); executionContext.Output("##[endgroup]"); - } - else + } + else { executionContext.Output($"Download action repository '{downloadInfo.NameWithOwner}@{downloadInfo.Ref}' (SHA:{downloadInfo.ResolvedSha})"); } diff --git a/src/Runner.Worker/ExecutionContext.cs b/src/Runner.Worker/ExecutionContext.cs index e64c6e24a..3410d1831 100644 --- a/src/Runner.Worker/ExecutionContext.cs +++ b/src/Runner.Worker/ExecutionContext.cs @@ -522,6 +522,10 @@ namespace GitHub.Runner.Worker if (annotation != null) { stepResult.Annotations.Add(annotation.Value); + if (annotation.Value.IsInfrastructureIssue && string.IsNullOrEmpty(Global.InfrastructureFailureCategory)) + { + Global.InfrastructureFailureCategory = issue.Category; + } } }); @@ -1335,9 +1339,9 @@ namespace GitHub.Runner.Worker } // Do not add a format string overload. See comment on ExecutionContext.Write(). - public static void InfrastructureError(this IExecutionContext context, string message) + public static void InfrastructureError(this IExecutionContext context, string message, string category = null) { - var issue = new Issue() { Type = IssueType.Error, Message = message, IsInfrastructureIssue = true }; + var issue = new Issue() { Type = IssueType.Error, Message = message, IsInfrastructureIssue = true, Category = category }; context.AddIssue(issue, ExecutionContextLogOptions.Default); } diff --git a/src/Runner.Worker/GlobalContext.cs b/src/Runner.Worker/GlobalContext.cs index 32b58384f..5a4c7babd 100644 --- a/src/Runner.Worker/GlobalContext.cs +++ b/src/Runner.Worker/GlobalContext.cs @@ -27,6 +27,7 @@ namespace GitHub.Runner.Worker public StepsContext StepsContext { get; set; } public Variables Variables { get; set; } public bool WriteDebug { get; set; } + public string InfrastructureFailureCategory { get; set; } public JObject ContainerHookState { get; set; } } } diff --git a/src/Runner.Worker/JobRunner.cs b/src/Runner.Worker/JobRunner.cs index 1390af13b..72ee5a403 100644 --- a/src/Runner.Worker/JobRunner.cs +++ b/src/Runner.Worker/JobRunner.cs @@ -321,7 +321,7 @@ namespace GitHub.Runner.Worker { try { - await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, default); + await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, infrastructureFailureCategory: jobContext.Global.InfrastructureFailureCategory, default); return result; } catch (VssUnauthorizedException ex) diff --git a/src/Sdk/RSWebApi/Contracts/CompleteJobRequest.cs b/src/Sdk/RSWebApi/Contracts/CompleteJobRequest.cs index a9ba71a57..c3049dc10 100644 --- a/src/Sdk/RSWebApi/Contracts/CompleteJobRequest.cs +++ b/src/Sdk/RSWebApi/Contracts/CompleteJobRequest.cs @@ -35,5 +35,8 @@ namespace GitHub.Actions.RunService.WebApi [DataMember(Name = "billingOwnerId", EmitDefaultValue = false)] public string BillingOwnerId { get; set; } + + [DataMember(Name = "infrastructureFailureCategory", EmitDefaultValue = false)] + public string InfrastructureFailureCategory { get; set; } } } diff --git a/src/Sdk/RSWebApi/Contracts/IssueExtensions.cs b/src/Sdk/RSWebApi/Contracts/IssueExtensions.cs index 113eaa7e0..34aaff813 100644 --- a/src/Sdk/RSWebApi/Contracts/IssueExtensions.cs +++ b/src/Sdk/RSWebApi/Contracts/IssueExtensions.cs @@ -42,6 +42,7 @@ namespace Sdk.RSWebApi.Contracts StartColumn = columnNumber, EndColumn = endColumnNumber, StepNumber = stepNumber, + IsInfrastructureIssue = issue.IsInfrastructureIssue ?? false }; } diff --git a/src/Sdk/RSWebApi/RunServiceHttpClient.cs b/src/Sdk/RSWebApi/RunServiceHttpClient.cs index bb1407706..83afdafff 100644 --- a/src/Sdk/RSWebApi/RunServiceHttpClient.cs +++ b/src/Sdk/RSWebApi/RunServiceHttpClient.cs @@ -131,6 +131,7 @@ namespace GitHub.Actions.RunService.WebApi string environmentUrl, IList telemetry, string billingOwnerId, + string infrastructureFailureCategory, CancellationToken cancellationToken = default) { HttpMethod httpMethod = new HttpMethod("POST"); @@ -145,6 +146,7 @@ namespace GitHub.Actions.RunService.WebApi EnvironmentUrl = environmentUrl, Telemetry = telemetry, BillingOwnerId = billingOwnerId, + InfrastructureFailureCategory = infrastructureFailureCategory }; requestUri = new Uri(requestUri, "completejob");