diff --git a/src/Runner.Worker/ActionRunner.cs b/src/Runner.Worker/ActionRunner.cs index 3ea522ac8..aba0823ec 100644 --- a/src/Runner.Worker/ActionRunner.cs +++ b/src/Runner.Worker/ActionRunner.cs @@ -64,6 +64,8 @@ namespace GitHub.Runner.Worker public Pipelines.ActionStep Action { get; set; } + public Int32 Retries => Action?.Retries ?? 0; + public TemplateToken Timeout => Action?.TimeoutInMinutes; public async Task RunAsync() diff --git a/src/Runner.Worker/JobExtensionRunner.cs b/src/Runner.Worker/JobExtensionRunner.cs index 48c2c2358..036224cf5 100644 --- a/src/Runner.Worker/JobExtensionRunner.cs +++ b/src/Runner.Worker/JobExtensionRunner.cs @@ -26,6 +26,7 @@ namespace GitHub.Runner.Worker public TemplateToken ContinueOnError => new BooleanToken(null, null, null, false); public string DisplayName { get; set; } public IExecutionContext ExecutionContext { get; set; } + public Int32 Retries => 0; public TemplateToken Timeout => new NumberToken(null, null, null, 0); public object Data => _data; diff --git a/src/Runner.Worker/StepsRunner.cs b/src/Runner.Worker/StepsRunner.cs index 58845206b..4abb17cfe 100644 --- a/src/Runner.Worker/StepsRunner.cs +++ b/src/Runner.Worker/StepsRunner.cs @@ -24,6 +24,7 @@ namespace GitHub.Runner.Worker TemplateToken ContinueOnError { get; } string DisplayName { get; set; } IExecutionContext ExecutionContext { get; set; } + Int32 Retries { get; } TemplateToken Timeout { get; } Task RunAsync(); } @@ -280,73 +281,96 @@ namespace GitHub.Runner.Worker step.ExecutionContext.Error("An error occurred when attempting to determine the step timeout."); step.ExecutionContext.Error(ex); } - if (timeoutMinutes > 0) - { - var timeout = TimeSpan.FromMinutes(timeoutMinutes); - step.ExecutionContext.SetTimeout(timeout); - } - await EncodingUtil.SetEncoding(HostContext, Trace, step.ExecutionContext.CancellationToken); - - try + int attempt = 1; + while (true) { - await step.RunAsync(); - } - catch (OperationCanceledException ex) - { - if (step.ExecutionContext.CancellationToken.IsCancellationRequested && - !jobCancellationToken.IsCancellationRequested) + if (timeoutMinutes > 0) { - Trace.Error($"Caught timeout exception from step: {ex.Message}"); - step.ExecutionContext.Error("The action has timed out."); - step.ExecutionContext.Result = TaskResult.Failed; + var timeout = TimeSpan.FromMinutes(timeoutMinutes); + step.ExecutionContext.SetTimeout(timeout); } - else - { - // Log the exception and cancel the step. - Trace.Error($"Caught cancellation exception from step: {ex}"); - step.ExecutionContext.Error(ex); - step.ExecutionContext.Result = TaskResult.Canceled; - } - } - catch (Exception ex) - { - // Log the error and fail the step. - Trace.Error($"Caught exception from step: {ex}"); - step.ExecutionContext.Error(ex); - step.ExecutionContext.Result = TaskResult.Failed; - } - // Merge execution context result with command result - if (step.ExecutionContext.CommandResult != null) - { - step.ExecutionContext.Result = TaskResultUtil.MergeTaskResults(step.ExecutionContext.Result, step.ExecutionContext.CommandResult.Value); - } + await EncodingUtil.SetEncoding(HostContext, Trace, step.ExecutionContext.CancellationToken); - // Fixup the step result if ContinueOnError. - if (step.ExecutionContext.Result == TaskResult.Failed) - { - var continueOnError = false; try { - continueOnError = templateEvaluator.EvaluateStepContinueOnError(step.ContinueOnError, step.ExecutionContext.ExpressionValues, step.ExecutionContext.ExpressionFunctions); + await step.RunAsync(); + } + catch (OperationCanceledException ex) + { + if (step.ExecutionContext.CancellationToken.IsCancellationRequested && + !jobCancellationToken.IsCancellationRequested) + { + Trace.Error($"Caught timeout exception from step: {ex.Message}"); + step.ExecutionContext.Error("The action has timed out."); + step.ExecutionContext.Result = TaskResult.Failed; + } + else + { + // Log the exception and cancel the step. + Trace.Error($"Caught cancellation exception from step: {ex}"); + step.ExecutionContext.Error(ex); + step.ExecutionContext.Result = TaskResult.Canceled; + } } catch (Exception ex) { - Trace.Info("The step failed and an error occurred when attempting to determine whether to continue on error."); - Trace.Error(ex); - step.ExecutionContext.Error("The step failed and an error occurred when attempting to determine whether to continue on error."); + // Log the error and fail the step. + Trace.Error($"Caught exception from step: {ex}"); step.ExecutionContext.Error(ex); + step.ExecutionContext.Result = TaskResult.Failed; } - if (continueOnError) + // Merge execution context result with command result + if (step.ExecutionContext.CommandResult != null) { - step.ExecutionContext.Outcome = step.ExecutionContext.Result; - step.ExecutionContext.Result = TaskResult.Succeeded; - Trace.Info($"Updated step result (continue on error)"); + step.ExecutionContext.Result = TaskResultUtil.MergeTaskResults(step.ExecutionContext.Result, step.ExecutionContext.CommandResult.Value); } + + // Fixup the step result if ContinueOnError. + if (step.ExecutionContext.Result == TaskResult.Failed) + { + var continueOnError = false; + try + { + continueOnError = templateEvaluator.EvaluateStepContinueOnError(step.ContinueOnError, step.ExecutionContext.ExpressionValues, step.ExecutionContext.ExpressionFunctions); + } + catch (Exception ex) + { + Trace.Info("The step failed and an error occurred when attempting to determine whether to continue on error."); + Trace.Error(ex); + step.ExecutionContext.Error("The step failed and an error occurred when attempting to determine whether to continue on error."); + step.ExecutionContext.Error(ex); + } + + if (continueOnError) + { + step.ExecutionContext.Outcome = step.ExecutionContext.Result; + step.ExecutionContext.Result = TaskResult.Succeeded; + Trace.Info($"Updated step result (continue on error)"); + } + } + Trace.Info($"Step result: {step.ExecutionContext.Result}"); + + if (step.ExecutionContext.Result == TaskResult.Failed && attempt <= step.Retries) + { + attempt++; + step.ExecutionContext.Result = null; + step.ExecutionContext.ResultCode = null; + // todo: replace the step cancellation token source + // todo: reset the step.ExecutionContext.CommandResult + // todo: create a new timeline record, e.g. "My display name (#2)" + // todo: clear outputs? What will we do on a job? probably clear outputs since merging from separate timeline attempts would otherwise be complex + // todo: consider intrastate - i guess it makes sense this doesn't get cleared + // todo: reconcile all of the above wrt composite steps + // todo: reconcile all of the above wrt pre/post + // todo: distinguish retryable vs non-retryable failures? e.g. if an exception bubbles from the handler + continue; + } + + break; } - Trace.Info($"Step result: {step.ExecutionContext.Result}"); // Complete the step context. step.ExecutionContext.Debug($"Finishing: {step.DisplayName}"); diff --git a/src/Sdk/DTPipelines/Pipelines/ActionStep.cs b/src/Sdk/DTPipelines/Pipelines/ActionStep.cs index f4ed5f041..cd8d2edc3 100644 --- a/src/Sdk/DTPipelines/Pipelines/ActionStep.cs +++ b/src/Sdk/DTPipelines/Pipelines/ActionStep.cs @@ -22,6 +22,7 @@ namespace GitHub.DistributedTask.Pipelines this.Reference = actionToClone.Reference?.Clone(); Environment = actionToClone.Environment?.Clone(); + Retries = actionToClone.Retries; Inputs = actionToClone.Inputs?.Clone(); ContextName = actionToClone?.ContextName; DisplayNameToken = actionToClone.DisplayNameToken?.Clone(); @@ -46,6 +47,9 @@ namespace GitHub.DistributedTask.Pipelines [DataMember(EmitDefaultValue = false)] public TemplateToken Environment { get; set; } + [DataMember(EmitDefaultValue = false)] + public Int32 Retries { get; set; } + [DataMember(EmitDefaultValue = false)] public TemplateToken Inputs { get; set; }