Compare commits

...

5 Commits

Author SHA1 Message Date
Thomas Boop
6d2be5f366 rename 2021-08-04 01:40:08 +00:00
Thomas Boop
2724797626 minor cleanup 2021-08-03 22:12:27 +00:00
Thomas Boop
fc57c796ce set context name for pre/post steps for composite actions 2021-08-03 21:49:05 +00:00
Thomas Boop
3851acd0cf fix continue on error (#1238) 2021-08-03 17:44:58 -04:00
Tingluo Huang
aab4aca8f7 Finish job when worker crashed with IOException. (#1239) 2021-08-03 16:21:39 -04:00
5 changed files with 46 additions and 51 deletions

View File

@@ -507,7 +507,20 @@ namespace GitHub.Runner.Listener
{
detailInfo = string.Join(Environment.NewLine, workerOutput);
Trace.Info($"Return code {returnCode} indicate worker encounter an unhandled exception or app crash, attach worker stdout/stderr to JobRequest result.");
await LogWorkerProcessUnhandledException(message, detailInfo);
var jobServer = HostContext.GetService<IJobServer>();
VssCredentials jobServerCredential = VssUtil.GetVssCredential(systemConnection);
VssConnection jobConnection = VssUtil.CreateConnection(systemConnection.Url, jobServerCredential);
await jobServer.ConnectAsync(jobConnection);
await LogWorkerProcessUnhandledException(jobServer, message, detailInfo);
// Go ahead to finish the job with result 'Failed' if the STDERR from worker is System.IO.IOException, since it typically means we are running out of disk space.
if (detailInfo.Contains(typeof(System.IO.IOException).ToString(), StringComparison.OrdinalIgnoreCase))
{
Trace.Info($"Finish job with result 'Failed' due to IOException.");
await ForceFailJob(jobServer, message);
}
}
TaskResult result = TaskResultUtil.TranslateFromReturnCode(returnCode);
@@ -915,53 +928,16 @@ namespace GitHub.Runner.Listener
}
// log an error issue to job level timeline record
private async Task LogWorkerProcessUnhandledException(Pipelines.AgentJobRequestMessage message, string errorMessage)
private async Task LogWorkerProcessUnhandledException(IJobServer jobServer, Pipelines.AgentJobRequestMessage message, string errorMessage)
{
try
{
var systemConnection = message.Resources.Endpoints.SingleOrDefault(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection));
ArgUtil.NotNull(systemConnection, nameof(systemConnection));
var jobServer = HostContext.GetService<IJobServer>();
VssCredentials jobServerCredential = VssUtil.GetVssCredential(systemConnection);
VssConnection jobConnection = VssUtil.CreateConnection(systemConnection.Url, jobServerCredential);
/* Below is the legacy 'OnPremises' code that is currently unused by the runner
ToDo: re-implement code as appropriate once GHES support is added.
// Make sure SystemConnection Url match Config Url base for OnPremises server
if (!message.Variables.ContainsKey(Constants.Variables.System.ServerType) ||
string.Equals(message.Variables[Constants.Variables.System.ServerType]?.Value, "OnPremises", StringComparison.OrdinalIgnoreCase))
{
try
{
Uri result = null;
Uri configUri = new Uri(_runnerSetting.ServerUrl);
if (Uri.TryCreate(new Uri(configUri.GetComponents(UriComponents.SchemeAndServer, UriFormat.Unescaped)), jobServerUrl.PathAndQuery, out result))
{
//replace the schema and host portion of messageUri with the host from the
//server URI (which was set at config time)
jobServerUrl = result;
}
}
catch (InvalidOperationException ex)
{
//cannot parse the Uri - not a fatal error
Trace.Error(ex);
}
catch (UriFormatException ex)
{
//cannot parse the Uri - not a fatal error
Trace.Error(ex);
}
} */
await jobServer.ConnectAsync(jobConnection);
var timeline = await jobServer.GetTimelineAsync(message.Plan.ScopeIdentifier, message.Plan.PlanType, message.Plan.PlanId, message.Timeline.Id, CancellationToken.None);
ArgUtil.NotNull(timeline, nameof(timeline));
TimelineRecord jobRecord = timeline.Records.FirstOrDefault(x => x.Id == message.JobId && x.RecordType == "Job");
ArgUtil.NotNull(jobRecord, nameof(jobRecord));
var unhandledExceptionIssue = new Issue() { Type = IssueType.Error, Message = errorMessage };
unhandledExceptionIssue.Data[Constants.Runner.InternalTelemetryIssueDataKey] = Constants.Runner.WorkerCrash;
jobRecord.ErrorCount++;
@@ -975,6 +951,21 @@ namespace GitHub.Runner.Listener
}
}
// raise job completed event to fail the job.
private async Task ForceFailJob(IJobServer jobServer, Pipelines.AgentJobRequestMessage message)
{
try
{
var jobCompletedEvent = new JobCompletedEvent(message.RequestId, message.JobId, TaskResult.Failed);
await jobServer.RaisePlanEventAsync<JobCompletedEvent>(message.Plan.ScopeIdentifier, message.Plan.PlanType, message.Plan.PlanId, jobCompletedEvent, CancellationToken.None);
}
catch (Exception ex)
{
Trace.Error("Fail to raise JobCompletedEvent back to service.");
Trace.Error(ex);
}
}
private class WorkerDispatcher : IDisposable
{
public long RequestId { get; }

View File

@@ -272,7 +272,7 @@ namespace GitHub.Runner.Worker
return;
}
step.ExecutionContext = Root.CreatePostChild(step.DisplayName, IntraActionState);
step.ExecutionContext = Root.CreatePostChild(step.DisplayName, IntraActionState, ContextName);
Root.PostJobSteps.Push(step);
}
@@ -914,7 +914,7 @@ namespace GitHub.Runner.Worker
}
}
private IExecutionContext CreatePostChild(string displayName, Dictionary<string, string> intraActionState)
private IExecutionContext CreatePostChild(string displayName, Dictionary<string, string> intraActionState, string contextName)
{
if (!_expandedForPostJob)
{
@@ -924,7 +924,7 @@ namespace GitHub.Runner.Worker
}
var newGuid = Guid.NewGuid();
return CreateChild(newGuid, displayName, newGuid.ToString("N"), null, null, intraActionState, _childTimelineRecordOrder - Root.PostJobSteps.Count);
return CreateChild(newGuid, displayName, newGuid.ToString("N"), null, contextName, intraActionState, _childTimelineRecordOrder - Root.PostJobSteps.Count);
}
}

View File

@@ -153,7 +153,7 @@ namespace GitHub.Runner.Worker.Handlers
}
// Run embedded steps
await RunStepsAsync(embeddedSteps);
await RunStepsAsync(embeddedSteps, stage);
// Set outputs
ExecutionContext.ExpressionValues["inputs"] = inputsData;
@@ -212,7 +212,7 @@ namespace GitHub.Runner.Worker.Handlers
}
}
private async Task RunStepsAsync(List<IStep> embeddedSteps)
private async Task RunStepsAsync(List<IStep> embeddedSteps, ActionRunStage stage)
{
ArgUtil.NotNull(embeddedSteps, nameof(embeddedSteps));
@@ -388,9 +388,13 @@ namespace GitHub.Runner.Worker.Handlers
if (step.ExecutionContext.Result == TaskResult.Failed || step.ExecutionContext.Result == TaskResult.Canceled)
{
Trace.Info($"Update job result with current composite step result '{step.ExecutionContext.Result}'.");
ExecutionContext.Result = step.ExecutionContext.Result;
ExecutionContext.Root.Result = TaskResultUtil.MergeTaskResults(ExecutionContext.Root.Result, step.ExecutionContext.Result.Value);
ExecutionContext.Root.JobContext.Status = ExecutionContext.Root.Result?.ToActionResult();
ExecutionContext.Result = TaskResultUtil.MergeTaskResults(ExecutionContext.Result, step.ExecutionContext.Result.Value);
// We should run cleanup even if one of the cleanup step fails
if (stage != ActionRunStage.Post)
{
break;
}
}
}
}

View File

@@ -312,7 +312,7 @@ namespace GitHub.Runner.Worker
{
ArgUtil.NotNull(actionStep, step.DisplayName);
Guid stepId = Guid.NewGuid();
actionStep.ExecutionContext = jobContext.CreateChild(stepId, actionStep.DisplayName, stepId.ToString("N"), null, null, intraActionStates[actionStep.Action.Id]);
actionStep.ExecutionContext = jobContext.CreateChild(stepId, actionStep.DisplayName, stepId.ToString("N"), null, actionStep.Action.ContextName, intraActionStates[actionStep.Action.Id]);
}
}

View File

@@ -70,7 +70,7 @@ namespace GitHub.Runner.Worker
public bool Retain_Default_Encoding => true;
#endif
public bool? Step_Debug => GetBoolean(Constants.Variables.Actions.StepDebug);
public bool? Step_Debug => true;
public string System_PhaseDisplayName => Get(Constants.Variables.System.PhaseDisplayName);