mirror of
https://github.com/actions/runner.git
synced 2025-12-10 12:36:23 +00:00
Compare commits
1 Commits
v2.312.0
...
users/vanz
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b416898db |
@@ -158,6 +158,7 @@ namespace GitHub.Runner.Common
|
||||
public static readonly string LogTemplateErrorsAsDebugMessages = "DistributedTask.LogTemplateErrorsAsDebugMessages";
|
||||
public static readonly string UseContainerPathForTemplate = "DistributedTask.UseContainerPathForTemplate";
|
||||
public static readonly string AllowRunnerContainerHooks = "DistributedTask.AllowRunnerContainerHooks";
|
||||
public static readonly string AllowRunnerStallDetect = "DistributedTask.AllowRunnerStallDetect";
|
||||
}
|
||||
|
||||
public static readonly string InternalTelemetryIssueDataKey = "_internal_telemetry";
|
||||
|
||||
@@ -11,5 +11,10 @@ namespace GitHub.Runner.Worker
|
||||
var isContainerHooksPathSet = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable(Constants.Hooks.ContainerHooksPath));
|
||||
return isContainerHookFeatureFlagSet && isContainerHooksPathSet;
|
||||
}
|
||||
public static bool IsStallDetectEnabled(Variables variables)
|
||||
{
|
||||
var isStallDetectFeatureFlagSet = variables?.GetBoolean(Constants.Runner.Features.AllowRunnerStallDetect) ?? false;
|
||||
return isStallDetectFeatureFlagSet;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
using System;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
@@ -240,9 +240,11 @@ namespace GitHub.Runner.Worker.Handlers
|
||||
}
|
||||
else
|
||||
{
|
||||
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, container))
|
||||
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, container))
|
||||
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
|
||||
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, container, stallManager),
|
||||
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, container, stallManager))
|
||||
{
|
||||
stallManager?.Initialize();
|
||||
var runExitCode = await dockerManager.DockerRun(ExecutionContext, container, stdoutManager.OnDataReceived, stderrManager.OnDataReceived);
|
||||
ExecutionContext.Debug($"Docker Action run completed with exit code {runExitCode}");
|
||||
if (runExitCode != 0)
|
||||
|
||||
@@ -159,12 +159,15 @@ namespace GitHub.Runner.Worker.Handlers
|
||||
ExecutionContext.Global.Variables.Set("Node12ActionsWarnings", StringUtil.ConvertToJson(warningActions));
|
||||
}
|
||||
|
||||
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager))
|
||||
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager))
|
||||
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
|
||||
|
||||
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager),
|
||||
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
|
||||
{
|
||||
StepHost.OutputDataReceived += stdoutManager.OnDataReceived;
|
||||
StepHost.ErrorDataReceived += stderrManager.OnDataReceived;
|
||||
|
||||
stallManager?.Initialize();
|
||||
// Execute the process. Exit code 0 should always be returned.
|
||||
// A non-zero exit code indicates infrastructural failure.
|
||||
// Task failure should be communicated over STDOUT using ## commands.
|
||||
|
||||
@@ -26,12 +26,14 @@ namespace GitHub.Runner.Worker.Handlers
|
||||
private IssueMatcher[] _matchers = Array.Empty<IssueMatcher>();
|
||||
// Mapping that indicates whether a directory belongs to the workflow repository
|
||||
private readonly Dictionary<string, string> _directoryMap = new();
|
||||
private StallManager _stallManager;
|
||||
|
||||
public OutputManager(IExecutionContext executionContext, IActionCommandManager commandManager, ContainerInfo container = null)
|
||||
public OutputManager(IExecutionContext executionContext, IActionCommandManager commandManager, ContainerInfo container = null, StallManager stallManager = null)
|
||||
{
|
||||
_executionContext = executionContext;
|
||||
_commandManager = commandManager;
|
||||
_container = container ?? executionContext.Global.Container;
|
||||
_stallManager = stallManager;
|
||||
|
||||
// Recursion failsafe (test override)
|
||||
var failsafeString = Environment.GetEnvironmentVariable("RUNNER_TEST_GET_REPOSITORY_PATH_FAILSAFE");
|
||||
@@ -76,6 +78,10 @@ namespace GitHub.Runner.Worker.Handlers
|
||||
|
||||
public void OnDataReceived(object sender, ProcessDataReceivedEventArgs e)
|
||||
{
|
||||
if (_stallManager != null)
|
||||
{
|
||||
_stallManager.OnDataReceived(sender, e);
|
||||
}
|
||||
var line = e.Data;
|
||||
|
||||
// ## commands
|
||||
|
||||
@@ -43,11 +43,14 @@ namespace GitHub.Runner.Worker.Handlers
|
||||
|
||||
// Make sure only particular task get run as runner plugin.
|
||||
var runnerPlugin = HostContext.GetService<IRunnerPluginManager>();
|
||||
using (var outputManager = new OutputManager(ExecutionContext, ActionCommandManager))
|
||||
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
|
||||
|
||||
using (OutputManager outputManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
|
||||
{
|
||||
ActionCommandManager.EnablePluginInternalCommand();
|
||||
try
|
||||
{
|
||||
stallManager?.Initialize();
|
||||
await runnerPlugin.RunPluginActionAsync(ExecutionContext, plugin, Inputs, Environment, RuntimeVariables, outputManager.OnDataReceived);
|
||||
}
|
||||
finally
|
||||
|
||||
@@ -321,13 +321,15 @@ namespace GitHub.Runner.Worker.Handlers
|
||||
ExecutionContext.Debug($"{fileName} {arguments}");
|
||||
|
||||
Inputs.TryGetValue("standardInInput", out var standardInInput);
|
||||
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager))
|
||||
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager))
|
||||
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
|
||||
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager),
|
||||
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
|
||||
{
|
||||
StepHost.OutputDataReceived += stdoutManager.OnDataReceived;
|
||||
StepHost.ErrorDataReceived += stderrManager.OnDataReceived;
|
||||
|
||||
// Execute
|
||||
stallManager?.Initialize();
|
||||
int exitCode = await StepHost.ExecuteAsync(ExecutionContext,
|
||||
workingDirectory: StepHost.ResolvePathForStepHost(ExecutionContext, workingDirectory),
|
||||
fileName: fileName,
|
||||
|
||||
70
src/Runner.Worker/Handlers/StallManager.cs
Normal file
70
src/Runner.Worker/Handlers/StallManager.cs
Normal file
@@ -0,0 +1,70 @@
|
||||
using System;
|
||||
using System.Timers;
|
||||
using GitHub.Runner.Common;
|
||||
using GitHub.Runner.Sdk;
|
||||
|
||||
namespace GitHub.Runner.Worker.Handlers
|
||||
{
|
||||
[ServiceLocator(Default = typeof(TimerAdapter))]
|
||||
public interface ITimer
|
||||
{
|
||||
void Start();
|
||||
void Stop();
|
||||
double Interval { get; set; }
|
||||
event ElapsedEventHandler Elapsed;
|
||||
bool AutoReset { get; set; }
|
||||
void Dispose();
|
||||
}
|
||||
|
||||
public class TimerAdapter : Timer, ITimer { }
|
||||
|
||||
public sealed class StallManager : IDisposable
|
||||
{
|
||||
public static TimeSpan DefaultStallInterval = TimeSpan.FromMinutes(30);
|
||||
|
||||
private readonly IExecutionContext _executionContext;
|
||||
private readonly double _interval;
|
||||
|
||||
private ITimer _timer { get; set; }
|
||||
private int _intervalsElapsedWhileStalled = 0;
|
||||
|
||||
public StallManager(IExecutionContext executionContext, double interval, ITimer timer)
|
||||
{
|
||||
_executionContext = executionContext;
|
||||
_interval = interval;
|
||||
_timer = timer;
|
||||
|
||||
_timer.Interval = _interval;
|
||||
_timer.Elapsed += TriggerWarning;
|
||||
}
|
||||
public StallManager(IExecutionContext executionContext, double interval) : this(executionContext, interval, new TimerAdapter()) { }
|
||||
public StallManager(IExecutionContext executionContext) : this(executionContext, StallManager.DefaultStallInterval.TotalMilliseconds) { }
|
||||
|
||||
public void Initialize()
|
||||
{
|
||||
this.OnDataReceived(null, null);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
_timer.Dispose();
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
public void OnDataReceived(object sender, ProcessDataReceivedEventArgs e)
|
||||
{
|
||||
_intervalsElapsedWhileStalled = 0;
|
||||
_timer.Stop();
|
||||
_timer.Start();
|
||||
}
|
||||
|
||||
private void TriggerWarning(object source, ElapsedEventArgs e)
|
||||
{
|
||||
_intervalsElapsedWhileStalled++;
|
||||
_executionContext.Warning($"No output has been detected in the last {TimeSpan.FromMilliseconds(_intervalsElapsedWhileStalled * _interval).TotalMinutes} minutes and the process has not yet exited. This step may have stalled and might require some investigation.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1014,7 +1014,8 @@ namespace GitHub.Runner.Common.Tests.Worker
|
||||
return false;
|
||||
});
|
||||
|
||||
_outputManager = new OutputManager(_executionContext.Object, _commandManager.Object, stepContainer);
|
||||
StallManager stallManager = new StallManager(_executionContext.Object);
|
||||
_outputManager = new OutputManager(_executionContext.Object, _commandManager.Object, stepContainer, stallManager);
|
||||
return hostContext;
|
||||
}
|
||||
|
||||
|
||||
156
src/Test/L0/Worker/StallManagerL0.cs
Normal file
156
src/Test/L0/Worker/StallManagerL0.cs
Normal file
@@ -0,0 +1,156 @@
|
||||
using System;
|
||||
using System.Timers;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.CompilerServices;
|
||||
using GitHub.Runner.Worker;
|
||||
using GitHub.Runner.Worker.Container;
|
||||
using GitHub.Runner.Worker.Handlers;
|
||||
using Moq;
|
||||
using Xunit;
|
||||
using DTWebApi = GitHub.DistributedTask.WebApi;
|
||||
using GitHub.Runner.Common.Util;
|
||||
using GitHub.DistributedTask.WebApi;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace GitHub.Runner.Common.Tests.Worker
|
||||
{
|
||||
|
||||
public class MockTimer : ITimer
|
||||
{
|
||||
public bool _started = false;
|
||||
public bool _stopped = false;
|
||||
public bool _reset = false;
|
||||
public double Interval { get; set; }
|
||||
public event ElapsedEventHandler Elapsed;
|
||||
public bool AutoReset { get; set; }
|
||||
|
||||
public MockTimer()
|
||||
{
|
||||
Interval = 1;
|
||||
}
|
||||
|
||||
public void Dispose() { }
|
||||
|
||||
public void Start()
|
||||
{
|
||||
_started = true;
|
||||
if (_stopped)
|
||||
{
|
||||
_stopped = false;
|
||||
_reset = true;
|
||||
}
|
||||
}
|
||||
public void Stop()
|
||||
{
|
||||
_reset = false;
|
||||
_started = false;
|
||||
_stopped = true;
|
||||
}
|
||||
|
||||
public void TimeElapsed()
|
||||
{
|
||||
this.Elapsed.Invoke(this, new EventArgs() as ElapsedEventArgs);
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class StallManagerL0
|
||||
{
|
||||
private Mock<IExecutionContext> _executionContext;
|
||||
private List<Tuple<DTWebApi.Issue, string>> _issues;
|
||||
private Variables _variables;
|
||||
|
||||
private TestHostContext Setup(
|
||||
[CallerMemberName] string name = "",
|
||||
ContainerInfo jobContainer = null,
|
||||
ContainerInfo stepContainer = null)
|
||||
{
|
||||
var hostContext = new TestHostContext(this, name);
|
||||
_executionContext = new Mock<IExecutionContext>();
|
||||
_issues = new List<Tuple<DTWebApi.Issue, string>>();
|
||||
|
||||
// Variables to test for secret scrubbing & FF options
|
||||
_variables = new Variables(hostContext, new Dictionary<string, VariableValue>
|
||||
{
|
||||
{ "DistributedTask.AllowRunnerStallDetect", new VariableValue("true", true) },
|
||||
});
|
||||
|
||||
_executionContext.Setup(x => x.Global)
|
||||
.Returns(new GlobalContext
|
||||
{
|
||||
Container = jobContainer,
|
||||
Variables = _variables,
|
||||
WriteDebug = true,
|
||||
});
|
||||
|
||||
_executionContext.Setup(x => x.AddIssue(It.IsAny<DTWebApi.Issue>(), It.IsAny<ExecutionContextLogOptions>()))
|
||||
.Callback((DTWebApi.Issue issue, ExecutionContextLogOptions logOptions) =>
|
||||
{
|
||||
var resolvedMessage = issue.Message;
|
||||
if (logOptions.WriteToLog && !string.IsNullOrEmpty(logOptions.LogMessageOverride))
|
||||
{
|
||||
resolvedMessage = logOptions.LogMessageOverride;
|
||||
}
|
||||
_issues.Add(new(issue, resolvedMessage));
|
||||
});
|
||||
|
||||
return hostContext;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[Trait("Level", "L0")]
|
||||
[Trait("Category", "Worker")]
|
||||
public void OutputWarningMessageOnTimeElapsed()
|
||||
{
|
||||
MockTimer timer = new MockTimer();
|
||||
using (Setup())
|
||||
using (StallManager manager = new StallManager(_executionContext.Object, TimeSpan.FromMinutes(10).TotalMilliseconds, timer))
|
||||
{
|
||||
|
||||
timer.TimeElapsed();
|
||||
|
||||
Assert.Equal(1, _issues.Count);
|
||||
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[0].Item1.Message);
|
||||
Assert.Equal(DTWebApi.IssueType.Warning, _issues[0].Item1.Type);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[Trait("Level", "L0")]
|
||||
[Trait("Category", "Worker")]
|
||||
public void ValidateTimerResetOnNewMessage()
|
||||
{
|
||||
|
||||
MockTimer timer = new MockTimer();
|
||||
using (Setup())
|
||||
using (StallManager manager = new StallManager(_executionContext.Object, TimeSpan.FromMinutes(10).TotalMilliseconds, timer))
|
||||
{
|
||||
|
||||
// Trigger 2 elapsed
|
||||
timer.TimeElapsed();
|
||||
timer.TimeElapsed();
|
||||
|
||||
// Should have triggered 2 warnings
|
||||
Assert.Equal(2, _issues.Count);
|
||||
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[0].Item1.Message);
|
||||
Assert.Equal("No output has been detected in the last 20 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[1].Item1.Message);
|
||||
Assert.Equal(DTWebApi.IssueType.Warning, _issues[0].Item1.Type);
|
||||
Assert.Equal(DTWebApi.IssueType.Warning, _issues[1].Item1.Type);
|
||||
|
||||
// Should reset timer
|
||||
manager.OnDataReceived(null, null);
|
||||
|
||||
Assert.True(timer._reset);
|
||||
Assert.Equal(2, _issues.Count);
|
||||
|
||||
// Trigger another elapsed interval
|
||||
timer.TimeElapsed();
|
||||
|
||||
// Timer should have reset and one new warning should have been added
|
||||
Assert.Equal(3, _issues.Count);
|
||||
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[2].Item1.Message);
|
||||
Assert.Equal(DTWebApi.IssueType.Warning, _issues[2].Item1.Type);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user