Compare commits

...

4 Commits

Author SHA1 Message Date
Ryan van Zeben
4b416898db Add in stall manager 2023-07-24 10:18:51 +00:00
JoannaaKL
496904c0b7 Fix feature flag location (#2703) 2023-07-19 12:19:42 +02:00
Tingluo Huang
b91ad56f92 Check connectivity for endpoints requested by service. (#2691) 2023-07-17 08:36:20 -04:00
Ferenc Hammerl
f25c9dfba3 Fixes if:cancelled() composite steps not running and normal composite steps not interrupting when the job is cancelled. (#2638)
* Set composite step's action_status when job is cancelled

Also make composite step inherit timeout from parent

* Fix eof line
2023-07-10 13:32:30 +02:00
16 changed files with 409 additions and 37 deletions

View File

@@ -158,6 +158,7 @@ namespace GitHub.Runner.Common
public static readonly string LogTemplateErrorsAsDebugMessages = "DistributedTask.LogTemplateErrorsAsDebugMessages";
public static readonly string UseContainerPathForTemplate = "DistributedTask.UseContainerPathForTemplate";
public static readonly string AllowRunnerContainerHooks = "DistributedTask.AllowRunnerContainerHooks";
public static readonly string AllowRunnerStallDetect = "DistributedTask.AllowRunnerStallDetect";
}
public static readonly string InternalTelemetryIssueDataKey = "_internal_telemetry";

View File

@@ -78,6 +78,7 @@ namespace GitHub.Runner.Worker
List<string> StepEnvironmentOverrides { get; }
ExecutionContext Root { get; }
ExecutionContext Parent { get; }
// Initialize
void InitializeJob(Pipelines.AgentJobRequestMessage message, CancellationToken token);
@@ -264,6 +265,14 @@ namespace GitHub.Runner.Worker
}
}
public ExecutionContext Parent
{
get
{
return _parentExecutionContext;
}
}
public JobContext JobContext
{
get
@@ -406,7 +415,7 @@ namespace GitHub.Runner.Worker
/// <summary>
/// An embedded execution context shares the same record ID, record name, logger,
/// and a linked cancellation token.
/// but NOT the cancellation token (just like workflow steps contexts - they don't share a token)
/// </summary>
public IExecutionContext CreateEmbeddedChild(
string scopeName,
@@ -416,7 +425,7 @@ namespace GitHub.Runner.Worker
Dictionary<string, string> intraActionState = null,
string siblingScopeName = null)
{
return Root.CreateChild(_record.Id, _record.Name, _record.Id.ToString("N"), scopeName, contextName, stage, logger: _logger, isEmbedded: true, cancellationTokenSource: CancellationTokenSource.CreateLinkedTokenSource(_cancellationTokenSource.Token), intraActionState: intraActionState, embeddedId: embeddedId, siblingScopeName: siblingScopeName);
return Root.CreateChild(_record.Id, _record.Name, _record.Id.ToString("N"), scopeName, contextName, stage, logger: _logger, isEmbedded: true, cancellationTokenSource: null, intraActionState: intraActionState, embeddedId: embeddedId, siblingScopeName: siblingScopeName);
}
public void Start(string currentOperation = null)
@@ -597,9 +606,33 @@ namespace GitHub.Runner.Worker
if (timeout != null)
{
_cancellationTokenSource.CancelAfter(timeout.Value);
m_timeoutStartedAt = DateTime.UtcNow;
m_timeout = timeout.Value;
}
}
DateTime? m_timeoutStartedAt;
TimeSpan? m_timeout;
public TimeSpan? GetRemainingTimeout()
{
if (m_timeoutStartedAt != null && m_timeout != null)
{
var elapsedSinceTimeoutSet = DateTime.UtcNow - m_timeoutStartedAt.Value;
var remainingTimeout = m_timeout.Value - elapsedSinceTimeoutSet;
if (remainingTimeout.Ticks > 0)
{
return remainingTimeout;
}
else
{
// there was a timeout and it has expired
return TimeSpan.Zero;
}
}
// no timeout was ever set
return null;
}
public void Progress(int percentage, string currentOperation = null)
{
if (percentage > 100 || percentage < 0)
@@ -1435,7 +1468,7 @@ namespace GitHub.Runner.Worker
private bool logTemplateErrorsAsDebugMessages()
{
if (_executionContext.Global.EnvironmentVariables.TryGetValue(Constants.Runner.Features.LogTemplateErrorsAsDebugMessages, out var logErrorsAsDebug))
if (_executionContext.Global.Variables.TryGetValue(Constants.Runner.Features.LogTemplateErrorsAsDebugMessages, out var logErrorsAsDebug))
{
return StringUtil.ConvertToBoolean(logErrorsAsDebug, defaultValue: false);
}

View File

@@ -11,5 +11,10 @@ namespace GitHub.Runner.Worker
var isContainerHooksPathSet = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable(Constants.Hooks.ContainerHooksPath));
return isContainerHookFeatureFlagSet && isContainerHooksPathSet;
}
public static bool IsStallDetectEnabled(Variables variables)
{
var isStallDetectFeatureFlagSet = variables?.GetBoolean(Constants.Runner.Features.AllowRunnerStallDetect) ?? false;
return isStallDetectFeatureFlagSet;
}
}
}

View File

@@ -310,6 +310,7 @@ namespace GitHub.Runner.Worker.Handlers
// Mark job as cancelled
ExecutionContext.Root.Result = TaskResult.Canceled;
ExecutionContext.Root.JobContext.Status = ExecutionContext.Root.Result?.ToActionResult();
step.ExecutionContext.SetGitHubContext("action_status", (ExecutionContext.Root.Result?.ToActionResult() ?? ActionResult.Cancelled).ToString().ToLowerInvariant());
step.ExecutionContext.Debug($"Re-evaluate condition on job cancellation for step: '{step.DisplayName}'.");
var conditionReTestTraceWriter = new ConditionTraceWriter(Trace, null); // host tracing only
@@ -420,6 +421,8 @@ namespace GitHub.Runner.Worker.Handlers
{
Trace.Info($"Starting: {step.DisplayName}");
step.ExecutionContext.Debug($"Starting: {step.DisplayName}");
// composite steps inherit the timeout from the parent, set by https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepstimeout-minutes
step.ExecutionContext.SetTimeout(step.ExecutionContext.Parent.GetRemainingTimeout());
await Common.Util.EncodingUtil.SetEncoding(HostContext, Trace, step.ExecutionContext.CancellationToken);

View File

@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
@@ -240,9 +240,11 @@ namespace GitHub.Runner.Worker.Handlers
}
else
{
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, container))
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, container))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, container, stallManager),
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, container, stallManager))
{
stallManager?.Initialize();
var runExitCode = await dockerManager.DockerRun(ExecutionContext, container, stdoutManager.OnDataReceived, stderrManager.OnDataReceived);
ExecutionContext.Debug($"Docker Action run completed with exit code {runExitCode}");
if (runExitCode != 0)

View File

@@ -159,12 +159,15 @@ namespace GitHub.Runner.Worker.Handlers
ExecutionContext.Global.Variables.Set("Node12ActionsWarnings", StringUtil.ConvertToJson(warningActions));
}
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager))
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager),
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
{
StepHost.OutputDataReceived += stdoutManager.OnDataReceived;
StepHost.ErrorDataReceived += stderrManager.OnDataReceived;
stallManager?.Initialize();
// Execute the process. Exit code 0 should always be returned.
// A non-zero exit code indicates infrastructural failure.
// Task failure should be communicated over STDOUT using ## commands.

View File

@@ -26,12 +26,14 @@ namespace GitHub.Runner.Worker.Handlers
private IssueMatcher[] _matchers = Array.Empty<IssueMatcher>();
// Mapping that indicates whether a directory belongs to the workflow repository
private readonly Dictionary<string, string> _directoryMap = new();
private StallManager _stallManager;
public OutputManager(IExecutionContext executionContext, IActionCommandManager commandManager, ContainerInfo container = null)
public OutputManager(IExecutionContext executionContext, IActionCommandManager commandManager, ContainerInfo container = null, StallManager stallManager = null)
{
_executionContext = executionContext;
_commandManager = commandManager;
_container = container ?? executionContext.Global.Container;
_stallManager = stallManager;
// Recursion failsafe (test override)
var failsafeString = Environment.GetEnvironmentVariable("RUNNER_TEST_GET_REPOSITORY_PATH_FAILSAFE");
@@ -76,6 +78,10 @@ namespace GitHub.Runner.Worker.Handlers
public void OnDataReceived(object sender, ProcessDataReceivedEventArgs e)
{
if (_stallManager != null)
{
_stallManager.OnDataReceived(sender, e);
}
var line = e.Data;
// ## commands

View File

@@ -43,11 +43,14 @@ namespace GitHub.Runner.Worker.Handlers
// Make sure only particular task get run as runner plugin.
var runnerPlugin = HostContext.GetService<IRunnerPluginManager>();
using (var outputManager = new OutputManager(ExecutionContext, ActionCommandManager))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager outputManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
{
ActionCommandManager.EnablePluginInternalCommand();
try
{
stallManager?.Initialize();
await runnerPlugin.RunPluginActionAsync(ExecutionContext, plugin, Inputs, Environment, RuntimeVariables, outputManager.OnDataReceived);
}
finally

View File

@@ -321,13 +321,15 @@ namespace GitHub.Runner.Worker.Handlers
ExecutionContext.Debug($"{fileName} {arguments}");
Inputs.TryGetValue("standardInInput", out var standardInInput);
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager))
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager),
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
{
StepHost.OutputDataReceived += stdoutManager.OnDataReceived;
StepHost.ErrorDataReceived += stderrManager.OnDataReceived;
// Execute
stallManager?.Initialize();
int exitCode = await StepHost.ExecuteAsync(ExecutionContext,
workingDirectory: StepHost.ResolvePathForStepHost(ExecutionContext, workingDirectory),
fileName: fileName,

View File

@@ -0,0 +1,70 @@
using System;
using System.Timers;
using GitHub.Runner.Common;
using GitHub.Runner.Sdk;
namespace GitHub.Runner.Worker.Handlers
{
[ServiceLocator(Default = typeof(TimerAdapter))]
public interface ITimer
{
void Start();
void Stop();
double Interval { get; set; }
event ElapsedEventHandler Elapsed;
bool AutoReset { get; set; }
void Dispose();
}
public class TimerAdapter : Timer, ITimer { }
public sealed class StallManager : IDisposable
{
public static TimeSpan DefaultStallInterval = TimeSpan.FromMinutes(30);
private readonly IExecutionContext _executionContext;
private readonly double _interval;
private ITimer _timer { get; set; }
private int _intervalsElapsedWhileStalled = 0;
public StallManager(IExecutionContext executionContext, double interval, ITimer timer)
{
_executionContext = executionContext;
_interval = interval;
_timer = timer;
_timer.Interval = _interval;
_timer.Elapsed += TriggerWarning;
}
public StallManager(IExecutionContext executionContext, double interval) : this(executionContext, interval, new TimerAdapter()) { }
public StallManager(IExecutionContext executionContext) : this(executionContext, StallManager.DefaultStallInterval.TotalMilliseconds) { }
public void Initialize()
{
this.OnDataReceived(null, null);
}
public void Dispose()
{
try
{
_timer.Dispose();
}
catch { }
}
public void OnDataReceived(object sender, ProcessDataReceivedEventArgs e)
{
_intervalsElapsedWhileStalled = 0;
_timer.Stop();
_timer.Start();
}
private void TriggerWarning(object source, ElapsedEventArgs e)
{
_intervalsElapsedWhileStalled++;
_executionContext.Warning($"No output has been detected in the last {TimeSpan.FromMilliseconds(_intervalsElapsedWhileStalled * _interval).TotalMinutes} minutes and the process has not yet exited. This step may have stalled and might require some investigation.");
}
}
}

View File

@@ -4,6 +4,7 @@ using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Runtime.Serialization;
using System.Threading;
using System.Threading.Tasks;
@@ -15,6 +16,7 @@ using GitHub.DistributedTask.WebApi;
using GitHub.Runner.Common;
using GitHub.Runner.Common.Util;
using GitHub.Runner.Sdk;
using GitHub.Services.Common;
using Pipelines = GitHub.DistributedTask.Pipelines;
namespace GitHub.Runner.Worker
@@ -34,12 +36,13 @@ namespace GitHub.Runner.Worker
public interface IJobExtension : IRunnerService
{
Task<List<IStep>> InitializeJob(IExecutionContext jobContext, Pipelines.AgentJobRequestMessage message);
void FinalizeJob(IExecutionContext jobContext, Pipelines.AgentJobRequestMessage message, DateTime jobStartTimeUtc);
Task FinalizeJob(IExecutionContext jobContext, Pipelines.AgentJobRequestMessage message, DateTime jobStartTimeUtc);
}
public sealed class JobExtension : RunnerService, IJobExtension
{
private readonly HashSet<string> _existingProcesses = new(StringComparer.OrdinalIgnoreCase);
private readonly List<Task<string>> _connectivityCheckTasks = new();
private bool _processCleanup;
private string _processLookupId = $"github_{Guid.NewGuid()}";
private CancellationTokenSource _diskSpaceCheckToken = new();
@@ -428,6 +431,22 @@ namespace GitHub.Runner.Worker
_diskSpaceCheckTask = CheckDiskSpaceAsync(context, _diskSpaceCheckToken.Token);
}
// Check server connectivity in background
ServiceEndpoint systemConnection = message.Resources.Endpoints.Single(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase));
if (systemConnection.Data.TryGetValue("ConnectivityChecks", out var connectivityChecksPayload) &&
!string.IsNullOrEmpty(connectivityChecksPayload))
{
Trace.Info($"Start checking server connectivity.");
var checkUrls = StringUtil.ConvertFromJson<List<string>>(connectivityChecksPayload);
if (checkUrls?.Count > 0)
{
foreach (var checkUrl in checkUrls)
{
_connectivityCheckTasks.Add(CheckConnectivity(checkUrl));
}
}
}
return steps;
}
catch (OperationCanceledException ex) when (jobContext.CancellationToken.IsCancellationRequested)
@@ -472,7 +491,7 @@ namespace GitHub.Runner.Worker
return reference;
}
public void FinalizeJob(IExecutionContext jobContext, Pipelines.AgentJobRequestMessage message, DateTime jobStartTimeUtc)
public async Task FinalizeJob(IExecutionContext jobContext, Pipelines.AgentJobRequestMessage message, DateTime jobStartTimeUtc)
{
Trace.Entering();
ArgUtil.NotNull(jobContext, nameof(jobContext));
@@ -649,6 +668,28 @@ namespace GitHub.Runner.Worker
{
_diskSpaceCheckToken.Cancel();
}
// Collect server connectivity check result
if (_connectivityCheckTasks.Count > 0)
{
try
{
Trace.Info($"Wait for all connectivity checks to finish.");
await Task.WhenAll(_connectivityCheckTasks);
foreach (var check in _connectivityCheckTasks)
{
var result = await check;
Trace.Info($"Connectivity check result: {result}");
context.Global.JobTelemetry.Add(new JobTelemetry() { Type = JobTelemetryType.ConnectivityCheck, Message = result });
}
}
catch (Exception ex)
{
Trace.Error($"Fail to check server connectivity.");
Trace.Error(ex);
context.Global.JobTelemetry.Add(new JobTelemetry() { Type = JobTelemetryType.ConnectivityCheck, Message = $"Fail to check server connectivity. {ex.Message}" });
}
}
}
catch (Exception ex)
{
@@ -664,6 +705,37 @@ namespace GitHub.Runner.Worker
}
}
private async Task<string> CheckConnectivity(string endpointUrl)
{
Trace.Info($"Check server connectivity for {endpointUrl}.");
string result = string.Empty;
using (var timeoutTokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(5)))
{
try
{
using (var httpClientHandler = HostContext.CreateHttpClientHandler())
using (var httpClient = new HttpClient(httpClientHandler))
{
httpClient.DefaultRequestHeaders.UserAgent.AddRange(HostContext.UserAgents);
var response = await httpClient.GetAsync(endpointUrl, timeoutTokenSource.Token);
result = $"{endpointUrl}: {response.StatusCode}";
}
}
catch (Exception ex) when (ex is OperationCanceledException && timeoutTokenSource.IsCancellationRequested)
{
Trace.Error($"Request timeout during connectivity check: {ex}");
result = $"{endpointUrl}: timeout";
}
catch (Exception ex)
{
Trace.Error($"Catch exception during connectivity check: {ex}");
result = $"{endpointUrl}: {ex.Message}";
}
}
return result;
}
private async Task CheckDiskSpaceAsync(IExecutionContext context, CancellationToken token)
{
while (!token.IsCancellationRequested)

View File

@@ -229,7 +229,7 @@ namespace GitHub.Runner.Worker
finally
{
Trace.Info("Finalize job.");
jobExtension.FinalizeJob(jobContext, message, jobStartTimeUtc);
await jobExtension.FinalizeJob(jobContext, message, jobStartTimeUtc);
}
Trace.Info($"Job result after all job steps finish: {jobContext.Result ?? TaskResult.Succeeded}");

View File

@@ -9,5 +9,8 @@ namespace GitHub.DistributedTask.WebApi
[EnumMember]
ActionCommand = 1,
[EnumMember]
ConnectivityCheck = 2,
}
}

View File

@@ -1,13 +1,13 @@
using GitHub.DistributedTask.WebApi;
using GitHub.Runner.Worker;
using Moq;
using System;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
using Xunit;
using System.Threading;
using System.Threading.Tasks;
using GitHub.DistributedTask.WebApi;
using GitHub.Runner.Worker;
using Moq;
using Xunit;
using Pipelines = GitHub.DistributedTask.Pipelines;
namespace GitHub.Runner.Common.Tests.Worker
@@ -105,6 +105,18 @@ namespace GitHub.Runner.Common.Tests.Worker
github["repository"] = new Pipelines.ContextData.StringContextData("actions/runner");
github["secret_source"] = new Pipelines.ContextData.StringContextData("Actions");
_message.ContextData.Add("github", github);
_message.Resources.Endpoints.Add(new ServiceEndpoint()
{
Name = WellKnownServiceEndpointNames.SystemVssConnection,
Url = new Uri("https://pipelines.actions.githubusercontent.com"),
Authorization = new EndpointAuthorization()
{
Scheme = "Test",
Parameters = {
{"AccessToken", "token"}
}
},
});
hc.SetSingleton(_actionManager.Object);
hc.SetSingleton(_config.Object);
@@ -231,7 +243,7 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void UploadDiganosticLogIfEnvironmentVariableSet()
public async Task UploadDiganosticLogIfEnvironmentVariableSet()
{
using (TestHostContext hc = CreateTestContext())
{
@@ -244,7 +256,7 @@ namespace GitHub.Runner.Common.Tests.Worker
_jobEc.Initialize(hc);
_jobEc.InitializeJob(_message, _tokenSource.Token);
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
_diagnosticLogManager.Verify(x =>
x.UploadDiagnosticLogs(
@@ -259,7 +271,7 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void DontUploadDiagnosticLogIfEnvironmentVariableFalse()
public async Task DontUploadDiagnosticLogIfEnvironmentVariableFalse()
{
using (TestHostContext hc = CreateTestContext())
{
@@ -272,7 +284,7 @@ namespace GitHub.Runner.Common.Tests.Worker
_jobEc.Initialize(hc);
_jobEc.InitializeJob(_message, _tokenSource.Token);
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
_diagnosticLogManager.Verify(x =>
x.UploadDiagnosticLogs(
@@ -287,14 +299,14 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void DontUploadDiagnosticLogIfEnvironmentVariableMissing()
public async Task DontUploadDiagnosticLogIfEnvironmentVariableMissing()
{
using (TestHostContext hc = CreateTestContext())
{
var jobExtension = new JobExtension();
jobExtension.Initialize(hc);
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
_diagnosticLogManager.Verify(x =>
x.UploadDiagnosticLogs(
@@ -309,7 +321,7 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void EnsureFinalizeJobRunsIfMessageHasNoEnvironmentUrl()
public async Task EnsureFinalizeJobRunsIfMessageHasNoEnvironmentUrl()
{
using (TestHostContext hc = CreateTestContext())
{
@@ -322,7 +334,7 @@ namespace GitHub.Runner.Common.Tests.Worker
_jobEc.Initialize(hc);
_jobEc.InitializeJob(_message, _tokenSource.Token);
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
Assert.Equal(TaskResult.Succeeded, _jobEc.Result);
}
@@ -331,7 +343,7 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void EnsureFinalizeJobHandlesNullEnvironmentUrl()
public async Task EnsureFinalizeJobHandlesNullEnvironmentUrl()
{
using (TestHostContext hc = CreateTestContext())
{
@@ -347,7 +359,7 @@ namespace GitHub.Runner.Common.Tests.Worker
_jobEc.Initialize(hc);
_jobEc.InitializeJob(_message, _tokenSource.Token);
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
Assert.Equal(TaskResult.Succeeded, _jobEc.Result);
}
@@ -356,7 +368,7 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void EnsureFinalizeJobHandlesNullEnvironment()
public async Task EnsureFinalizeJobHandlesNullEnvironment()
{
using (TestHostContext hc = CreateTestContext())
{
@@ -369,7 +381,7 @@ namespace GitHub.Runner.Common.Tests.Worker
_jobEc.Initialize(hc);
_jobEc.InitializeJob(_message, _tokenSource.Token);
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
Assert.Equal(TaskResult.Succeeded, _jobEc.Result);
}
@@ -397,7 +409,7 @@ namespace GitHub.Runner.Common.Tests.Worker
var hookStart = result.First() as JobExtensionRunner;
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
Assert.Equal(Constants.Hooks.JobStartedStepName, hookStart.DisplayName);
Assert.Equal(Constants.Hooks.JobCompletedStepName, (_jobEc.PostJobSteps.Last() as JobExtensionRunner).DisplayName);
@@ -410,7 +422,7 @@ namespace GitHub.Runner.Common.Tests.Worker
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void EnsureNoPreAndPostHookSteps()
public async Task EnsureNoPreAndPostHookSteps()
{
using (TestHostContext hc = CreateTestContext())
{
@@ -425,7 +437,7 @@ namespace GitHub.Runner.Common.Tests.Worker
var x = _jobEc.JobSteps;
jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
await jobExtension.FinalizeJob(_jobEc, _message, DateTime.UtcNow);
Assert.Equal(TaskResult.Succeeded, _jobEc.Result);
Assert.Equal(0, _jobEc.PostJobSteps.Count);

View File

@@ -1014,7 +1014,8 @@ namespace GitHub.Runner.Common.Tests.Worker
return false;
});
_outputManager = new OutputManager(_executionContext.Object, _commandManager.Object, stepContainer);
StallManager stallManager = new StallManager(_executionContext.Object);
_outputManager = new OutputManager(_executionContext.Object, _commandManager.Object, stepContainer, stallManager);
return hostContext;
}

View File

@@ -0,0 +1,156 @@
using System;
using System.Timers;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using GitHub.Runner.Worker;
using GitHub.Runner.Worker.Container;
using GitHub.Runner.Worker.Handlers;
using Moq;
using Xunit;
using DTWebApi = GitHub.DistributedTask.WebApi;
using GitHub.Runner.Common.Util;
using GitHub.DistributedTask.WebApi;
using System.Diagnostics;
namespace GitHub.Runner.Common.Tests.Worker
{
public class MockTimer : ITimer
{
public bool _started = false;
public bool _stopped = false;
public bool _reset = false;
public double Interval { get; set; }
public event ElapsedEventHandler Elapsed;
public bool AutoReset { get; set; }
public MockTimer()
{
Interval = 1;
}
public void Dispose() { }
public void Start()
{
_started = true;
if (_stopped)
{
_stopped = false;
_reset = true;
}
}
public void Stop()
{
_reset = false;
_started = false;
_stopped = true;
}
public void TimeElapsed()
{
this.Elapsed.Invoke(this, new EventArgs() as ElapsedEventArgs);
}
}
public sealed class StallManagerL0
{
private Mock<IExecutionContext> _executionContext;
private List<Tuple<DTWebApi.Issue, string>> _issues;
private Variables _variables;
private TestHostContext Setup(
[CallerMemberName] string name = "",
ContainerInfo jobContainer = null,
ContainerInfo stepContainer = null)
{
var hostContext = new TestHostContext(this, name);
_executionContext = new Mock<IExecutionContext>();
_issues = new List<Tuple<DTWebApi.Issue, string>>();
// Variables to test for secret scrubbing & FF options
_variables = new Variables(hostContext, new Dictionary<string, VariableValue>
{
{ "DistributedTask.AllowRunnerStallDetect", new VariableValue("true", true) },
});
_executionContext.Setup(x => x.Global)
.Returns(new GlobalContext
{
Container = jobContainer,
Variables = _variables,
WriteDebug = true,
});
_executionContext.Setup(x => x.AddIssue(It.IsAny<DTWebApi.Issue>(), It.IsAny<ExecutionContextLogOptions>()))
.Callback((DTWebApi.Issue issue, ExecutionContextLogOptions logOptions) =>
{
var resolvedMessage = issue.Message;
if (logOptions.WriteToLog && !string.IsNullOrEmpty(logOptions.LogMessageOverride))
{
resolvedMessage = logOptions.LogMessageOverride;
}
_issues.Add(new(issue, resolvedMessage));
});
return hostContext;
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void OutputWarningMessageOnTimeElapsed()
{
MockTimer timer = new MockTimer();
using (Setup())
using (StallManager manager = new StallManager(_executionContext.Object, TimeSpan.FromMinutes(10).TotalMilliseconds, timer))
{
timer.TimeElapsed();
Assert.Equal(1, _issues.Count);
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[0].Item1.Message);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[0].Item1.Type);
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void ValidateTimerResetOnNewMessage()
{
MockTimer timer = new MockTimer();
using (Setup())
using (StallManager manager = new StallManager(_executionContext.Object, TimeSpan.FromMinutes(10).TotalMilliseconds, timer))
{
// Trigger 2 elapsed
timer.TimeElapsed();
timer.TimeElapsed();
// Should have triggered 2 warnings
Assert.Equal(2, _issues.Count);
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[0].Item1.Message);
Assert.Equal("No output has been detected in the last 20 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[1].Item1.Message);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[0].Item1.Type);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[1].Item1.Type);
// Should reset timer
manager.OnDataReceived(null, null);
Assert.True(timer._reset);
Assert.Equal(2, _issues.Count);
// Trigger another elapsed interval
timer.TimeElapsed();
// Timer should have reset and one new warning should have been added
Assert.Equal(3, _issues.Count);
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[2].Item1.Message);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[2].Item1.Type);
}
}
}
}