From 830575dafef34f536eb7ff7796f591ed90d8421b Mon Sep 17 00:00:00 2001 From: Yang Cao Date: Fri, 11 Dec 2020 11:07:43 -0500 Subject: [PATCH] Count actions resolve failures as infra failures (#851) During job run we may fail to resolve actions download info, and this stack is fully controlled by GitHub actions so it should be counted as infrastructure failure instead of user failure. --- src/Runner.Worker/ActionManager.cs | 21 ++++++++++++++------- src/Runner.Worker/ExecutionContext.cs | 6 ++++++ src/Runner.Worker/JobExtension.cs | 8 ++++++++ src/Sdk/DTWebApi/WebApi/Exceptions.cs | 19 +++++++++++++++++++ src/Sdk/DTWebApi/WebApi/Issue.cs | 8 ++++++++ 5 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/Runner.Worker/ActionManager.cs b/src/Runner.Worker/ActionManager.cs index 5b137baed..6dfa72c05 100644 --- a/src/Runner.Worker/ActionManager.cs +++ b/src/Runner.Worker/ActionManager.cs @@ -594,15 +594,22 @@ namespace GitHub.Runner.Worker actionDownloadInfos = await jobServer.ResolveActionDownloadInfoAsync(executionContext.Global.Plan.ScopeIdentifier, executionContext.Global.Plan.PlanType, executionContext.Global.Plan.PlanId, new WebApi.ActionReferenceList { Actions = actionReferences }, executionContext.CancellationToken); break; } - catch (Exception ex) when (attempt < 3) + catch (Exception ex) { - executionContext.Output($"Failed to resolve action download info. Error: {ex.Message}"); - executionContext.Debug(ex.ToString()); - if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("_GITHUB_ACTION_DOWNLOAD_NO_BACKOFF"))) + if (attempt < 3) { - var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(30)); - executionContext.Output($"Retrying in {backoff.TotalSeconds} seconds"); - await Task.Delay(backoff); + executionContext.Output($"Failed to resolve action download info. Error: {ex.Message}"); + executionContext.Debug(ex.ToString()); + if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("_GITHUB_ACTION_DOWNLOAD_NO_BACKOFF"))) + { + var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(30)); + executionContext.Output($"Retrying in {backoff.TotalSeconds} seconds"); + await Task.Delay(backoff); + } + } + else + { + throw new WebApi.FailedToResolveActionDownloadInfoException("Failed to resolve action download info.", ex); } } } diff --git a/src/Runner.Worker/ExecutionContext.cs b/src/Runner.Worker/ExecutionContext.cs index 449b43537..545340d36 100644 --- a/src/Runner.Worker/ExecutionContext.cs +++ b/src/Runner.Worker/ExecutionContext.cs @@ -918,6 +918,12 @@ namespace GitHub.Runner.Worker context.AddIssue(new Issue() { Type = IssueType.Error, Message = message }); } + // Do not add a format string overload. See comment on ExecutionContext.Write(). + public static void InfrastructureError(this IExecutionContext context, string message) + { + context.AddIssue(new Issue() { Type = IssueType.Error, Message = message, IsInfrastructureIssue = true}); + } + // Do not add a format string overload. See comment on ExecutionContext.Write(). public static void Warning(this IExecutionContext context, string message) { diff --git a/src/Runner.Worker/JobExtension.cs b/src/Runner.Worker/JobExtension.cs index 1f51c57c8..d4f5d21b4 100644 --- a/src/Runner.Worker/JobExtension.cs +++ b/src/Runner.Worker/JobExtension.cs @@ -335,6 +335,14 @@ namespace GitHub.Runner.Worker context.Result = TaskResult.Canceled; throw; } + catch (FailedToResolveActionDownloadInfoException ex) + { + // Log the error and fail the JobExtension Initialization. + Trace.Error($"Caught exception from JobExtenion Initialization: {ex}"); + context.InfrastructureError(ex.Message); + context.Result = TaskResult.Failed; + throw; + } catch (Exception ex) { // Log the error and fail the JobExtension Initialization. diff --git a/src/Sdk/DTWebApi/WebApi/Exceptions.cs b/src/Sdk/DTWebApi/WebApi/Exceptions.cs index c3615da7a..9e2435f5c 100644 --- a/src/Sdk/DTWebApi/WebApi/Exceptions.cs +++ b/src/Sdk/DTWebApi/WebApi/Exceptions.cs @@ -2458,4 +2458,23 @@ namespace GitHub.DistributedTask.WebApi { } } + + [Serializable] + public sealed class FailedToResolveActionDownloadInfoException : DistributedTaskException + { + public FailedToResolveActionDownloadInfoException(String message) + : base(message) + { + } + + public FailedToResolveActionDownloadInfoException(String message, Exception innerException) + : base(message, innerException) + { + } + + private FailedToResolveActionDownloadInfoException(SerializationInfo info, StreamingContext context) + : base(info, context) + { + } + } } diff --git a/src/Sdk/DTWebApi/WebApi/Issue.cs b/src/Sdk/DTWebApi/WebApi/Issue.cs index 4875ca5a1..af4d2f850 100644 --- a/src/Sdk/DTWebApi/WebApi/Issue.cs +++ b/src/Sdk/DTWebApi/WebApi/Issue.cs @@ -17,6 +17,7 @@ namespace GitHub.DistributedTask.WebApi this.Type = issueToBeCloned.Type; this.Category = issueToBeCloned.Category; this.Message = issueToBeCloned.Message; + this.IsInfrastructureIssue = issueToBeCloned.IsInfrastructureIssue; if (issueToBeCloned.m_data != null) { @@ -48,6 +49,13 @@ namespace GitHub.DistributedTask.WebApi set; } + [DataMember(Order = 4)] + public bool? IsInfrastructureIssue + { + get; + set; + } + public IDictionary Data { get