Count actions resolve failures as infra failures (#851)

During job run we may fail to resolve actions download info, and this
stack is fully controlled by GitHub actions so it should be counted as
infrastructure failure instead of user failure.
This commit is contained in:
Yang Cao
2020-12-11 11:07:43 -05:00
committed by TingluoHuang
parent 9a41ec3d6e
commit 830575dafe
5 changed files with 55 additions and 7 deletions

View File

@@ -594,15 +594,22 @@ namespace GitHub.Runner.Worker
actionDownloadInfos = await jobServer.ResolveActionDownloadInfoAsync(executionContext.Global.Plan.ScopeIdentifier, executionContext.Global.Plan.PlanType, executionContext.Global.Plan.PlanId, new WebApi.ActionReferenceList { Actions = actionReferences }, executionContext.CancellationToken);
break;
}
catch (Exception ex) when (attempt < 3)
catch (Exception ex)
{
executionContext.Output($"Failed to resolve action download info. Error: {ex.Message}");
executionContext.Debug(ex.ToString());
if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("_GITHUB_ACTION_DOWNLOAD_NO_BACKOFF")))
if (attempt < 3)
{
var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(30));
executionContext.Output($"Retrying in {backoff.TotalSeconds} seconds");
await Task.Delay(backoff);
executionContext.Output($"Failed to resolve action download info. Error: {ex.Message}");
executionContext.Debug(ex.ToString());
if (String.IsNullOrEmpty(Environment.GetEnvironmentVariable("_GITHUB_ACTION_DOWNLOAD_NO_BACKOFF")))
{
var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(30));
executionContext.Output($"Retrying in {backoff.TotalSeconds} seconds");
await Task.Delay(backoff);
}
}
else
{
throw new WebApi.FailedToResolveActionDownloadInfoException("Failed to resolve action download info.", ex);
}
}
}

View File

@@ -918,6 +918,12 @@ namespace GitHub.Runner.Worker
context.AddIssue(new Issue() { Type = IssueType.Error, Message = message });
}
// Do not add a format string overload. See comment on ExecutionContext.Write().
public static void InfrastructureError(this IExecutionContext context, string message)
{
context.AddIssue(new Issue() { Type = IssueType.Error, Message = message, IsInfrastructureIssue = true});
}
// Do not add a format string overload. See comment on ExecutionContext.Write().
public static void Warning(this IExecutionContext context, string message)
{

View File

@@ -335,6 +335,14 @@ namespace GitHub.Runner.Worker
context.Result = TaskResult.Canceled;
throw;
}
catch (FailedToResolveActionDownloadInfoException ex)
{
// Log the error and fail the JobExtension Initialization.
Trace.Error($"Caught exception from JobExtenion Initialization: {ex}");
context.InfrastructureError(ex.Message);
context.Result = TaskResult.Failed;
throw;
}
catch (Exception ex)
{
// Log the error and fail the JobExtension Initialization.

View File

@@ -2458,4 +2458,23 @@ namespace GitHub.DistributedTask.WebApi
{
}
}
[Serializable]
public sealed class FailedToResolveActionDownloadInfoException : DistributedTaskException
{
public FailedToResolveActionDownloadInfoException(String message)
: base(message)
{
}
public FailedToResolveActionDownloadInfoException(String message, Exception innerException)
: base(message, innerException)
{
}
private FailedToResolveActionDownloadInfoException(SerializationInfo info, StreamingContext context)
: base(info, context)
{
}
}
}

View File

@@ -17,6 +17,7 @@ namespace GitHub.DistributedTask.WebApi
this.Type = issueToBeCloned.Type;
this.Category = issueToBeCloned.Category;
this.Message = issueToBeCloned.Message;
this.IsInfrastructureIssue = issueToBeCloned.IsInfrastructureIssue;
if (issueToBeCloned.m_data != null)
{
@@ -48,6 +49,13 @@ namespace GitHub.DistributedTask.WebApi
set;
}
[DataMember(Order = 4)]
public bool? IsInfrastructureIssue
{
get;
set;
}
public IDictionary<String, String> Data
{
get