Compare commits

..

1 Commits

Author SHA1 Message Date
Ryan van Zeben
4b416898db Add in stall manager 2023-07-24 10:18:51 +00:00
15 changed files with 279 additions and 25 deletions

View File

@@ -1,9 +1,19 @@
## Features
- Add warning to notify about forcing actions to run on node16 instead of node12 (#2678)
## Bugs
- Fixes `if:cancelled()` composite steps not running and normal composite steps not interrupting when the job is cancelled (#2638)
- Fix the bug causing double error reporting fix to remain inactive (#2703)
- Remove job completion from runner listener (#2659)
- Fix double error reporting (#2656)
- Fix a bug with incorrect parsing of image values in a container action (#1873)
- Fix error message reported on non-local action setup (#2668)
- Extend github context with host-workspace (#2517)
- Fixed a bug where a misplaced = character could bypass heredoc-style processing (#2627)
## Misc
- Collect telemetry on GitHub-related HTTP requests (#2691)
- Send environment url to Run Service (#2650)
- Reduce token service and unnecessary calls - send token to redirects (#2660)
- Add 'http://' to http(s)_proxy if there is no protocol (#2663)
- Remove extra result step for job itself (#2620)
_Note: Actions Runner follows a progressive release policy, so the latest release might not be available to your enterprise, organization, or repository yet.
To confirm which version of the Actions Runner you should expect, please view the download instructions for your enterprise, organization, or repository.

View File

@@ -1 +1 @@
2.307.1
<Update to ./src/runnerversion when creating release>

View File

@@ -158,6 +158,7 @@ namespace GitHub.Runner.Common
public static readonly string LogTemplateErrorsAsDebugMessages = "DistributedTask.LogTemplateErrorsAsDebugMessages";
public static readonly string UseContainerPathForTemplate = "DistributedTask.UseContainerPathForTemplate";
public static readonly string AllowRunnerContainerHooks = "DistributedTask.AllowRunnerContainerHooks";
public static readonly string AllowRunnerStallDetect = "DistributedTask.AllowRunnerStallDetect";
}
public static readonly string InternalTelemetryIssueDataKey = "_internal_telemetry";

View File

@@ -83,7 +83,7 @@ namespace GitHub.Runner.Worker
// Initialize
void InitializeJob(Pipelines.AgentJobRequestMessage message, CancellationToken token);
void CancelToken();
IExecutionContext CreateChild(Guid recordId, string displayName, string refName, string scopeName, string contextName, ActionRunStage stage, Dictionary<string, string> intraActionState = null, int? recordOrder = null, IPagingLogger logger = null, bool isEmbedded = false, CancellationTokenSource cancellationTokenSource = null, Guid embeddedId = default(Guid), string siblingScopeName = null, TimeSpan? timeout = null);
IExecutionContext CreateChild(Guid recordId, string displayName, string refName, string scopeName, string contextName, ActionRunStage stage, Dictionary<string, string> intraActionState = null, int? recordOrder = null, IPagingLogger logger = null, bool isEmbedded = false, CancellationTokenSource cancellationTokenSource = null, Guid embeddedId = default(Guid), string siblingScopeName = null);
IExecutionContext CreateEmbeddedChild(string scopeName, string contextName, Guid embeddedId, ActionRunStage stage, Dictionary<string, string> intraActionState = null, string siblingScopeName = null);
// logging
@@ -357,8 +357,7 @@ namespace GitHub.Runner.Worker
bool isEmbedded = false,
CancellationTokenSource cancellationTokenSource = null,
Guid embeddedId = default(Guid),
string siblingScopeName = null,
TimeSpan? timeout = null)
string siblingScopeName = null)
{
Trace.Entering();
@@ -387,12 +386,6 @@ namespace GitHub.Runner.Worker
child.ExpressionFunctions.Add(item);
}
child._cancellationTokenSource = cancellationTokenSource ?? new CancellationTokenSource();
if (timeout != null)
{
// composite steps inherit the timeout from the parent, set by https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepstimeout-minutes
child.SetTimeout(timeout);
}
child.EchoOnActionCommand = EchoOnActionCommand;
if (recordOrder != null)
@@ -432,7 +425,7 @@ namespace GitHub.Runner.Worker
Dictionary<string, string> intraActionState = null,
string siblingScopeName = null)
{
return Root.CreateChild(_record.Id, _record.Name, _record.Id.ToString("N"), scopeName, contextName, stage, logger: _logger, isEmbedded: true, cancellationTokenSource: null, intraActionState: intraActionState, embeddedId: embeddedId, siblingScopeName: siblingScopeName, timeout: GetRemainingTimeout());
return Root.CreateChild(_record.Id, _record.Name, _record.Id.ToString("N"), scopeName, contextName, stage, logger: _logger, isEmbedded: true, cancellationTokenSource: null, intraActionState: intraActionState, embeddedId: embeddedId, siblingScopeName: siblingScopeName);
}
public void Start(string currentOperation = null)

View File

@@ -11,5 +11,10 @@ namespace GitHub.Runner.Worker
var isContainerHooksPathSet = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable(Constants.Hooks.ContainerHooksPath));
return isContainerHookFeatureFlagSet && isContainerHooksPathSet;
}
public static bool IsStallDetectEnabled(Variables variables)
{
var isStallDetectFeatureFlagSet = variables?.GetBoolean(Constants.Runner.Features.AllowRunnerStallDetect) ?? false;
return isStallDetectFeatureFlagSet;
}
}
}

View File

@@ -421,6 +421,8 @@ namespace GitHub.Runner.Worker.Handlers
{
Trace.Info($"Starting: {step.DisplayName}");
step.ExecutionContext.Debug($"Starting: {step.DisplayName}");
// composite steps inherit the timeout from the parent, set by https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepstimeout-minutes
step.ExecutionContext.SetTimeout(step.ExecutionContext.Parent.GetRemainingTimeout());
await Common.Util.EncodingUtil.SetEncoding(HostContext, Trace, step.ExecutionContext.CancellationToken);

View File

@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
@@ -240,9 +240,11 @@ namespace GitHub.Runner.Worker.Handlers
}
else
{
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, container))
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, container))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, container, stallManager),
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, container, stallManager))
{
stallManager?.Initialize();
var runExitCode = await dockerManager.DockerRun(ExecutionContext, container, stdoutManager.OnDataReceived, stderrManager.OnDataReceived);
ExecutionContext.Debug($"Docker Action run completed with exit code {runExitCode}");
if (runExitCode != 0)

View File

@@ -159,12 +159,15 @@ namespace GitHub.Runner.Worker.Handlers
ExecutionContext.Global.Variables.Set("Node12ActionsWarnings", StringUtil.ConvertToJson(warningActions));
}
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager))
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager),
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
{
StepHost.OutputDataReceived += stdoutManager.OnDataReceived;
StepHost.ErrorDataReceived += stderrManager.OnDataReceived;
stallManager?.Initialize();
// Execute the process. Exit code 0 should always be returned.
// A non-zero exit code indicates infrastructural failure.
// Task failure should be communicated over STDOUT using ## commands.

View File

@@ -26,12 +26,14 @@ namespace GitHub.Runner.Worker.Handlers
private IssueMatcher[] _matchers = Array.Empty<IssueMatcher>();
// Mapping that indicates whether a directory belongs to the workflow repository
private readonly Dictionary<string, string> _directoryMap = new();
private StallManager _stallManager;
public OutputManager(IExecutionContext executionContext, IActionCommandManager commandManager, ContainerInfo container = null)
public OutputManager(IExecutionContext executionContext, IActionCommandManager commandManager, ContainerInfo container = null, StallManager stallManager = null)
{
_executionContext = executionContext;
_commandManager = commandManager;
_container = container ?? executionContext.Global.Container;
_stallManager = stallManager;
// Recursion failsafe (test override)
var failsafeString = Environment.GetEnvironmentVariable("RUNNER_TEST_GET_REPOSITORY_PATH_FAILSAFE");
@@ -76,6 +78,10 @@ namespace GitHub.Runner.Worker.Handlers
public void OnDataReceived(object sender, ProcessDataReceivedEventArgs e)
{
if (_stallManager != null)
{
_stallManager.OnDataReceived(sender, e);
}
var line = e.Data;
// ## commands

View File

@@ -43,11 +43,14 @@ namespace GitHub.Runner.Worker.Handlers
// Make sure only particular task get run as runner plugin.
var runnerPlugin = HostContext.GetService<IRunnerPluginManager>();
using (var outputManager = new OutputManager(ExecutionContext, ActionCommandManager))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager outputManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
{
ActionCommandManager.EnablePluginInternalCommand();
try
{
stallManager?.Initialize();
await runnerPlugin.RunPluginActionAsync(ExecutionContext, plugin, Inputs, Environment, RuntimeVariables, outputManager.OnDataReceived);
}
finally

View File

@@ -321,13 +321,15 @@ namespace GitHub.Runner.Worker.Handlers
ExecutionContext.Debug($"{fileName} {arguments}");
Inputs.TryGetValue("standardInInput", out var standardInInput);
using (var stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager))
using (var stderrManager = new OutputManager(ExecutionContext, ActionCommandManager))
StallManager stallManager = FeatureManager.IsStallDetectEnabled(ExecutionContext.Global.Variables) ? new StallManager(ExecutionContext) : null;
using (OutputManager stdoutManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager),
stderrManager = new OutputManager(ExecutionContext, ActionCommandManager, null, stallManager))
{
StepHost.OutputDataReceived += stdoutManager.OnDataReceived;
StepHost.ErrorDataReceived += stderrManager.OnDataReceived;
// Execute
stallManager?.Initialize();
int exitCode = await StepHost.ExecuteAsync(ExecutionContext,
workingDirectory: StepHost.ResolvePathForStepHost(ExecutionContext, workingDirectory),
fileName: fileName,

View File

@@ -0,0 +1,70 @@
using System;
using System.Timers;
using GitHub.Runner.Common;
using GitHub.Runner.Sdk;
namespace GitHub.Runner.Worker.Handlers
{
[ServiceLocator(Default = typeof(TimerAdapter))]
public interface ITimer
{
void Start();
void Stop();
double Interval { get; set; }
event ElapsedEventHandler Elapsed;
bool AutoReset { get; set; }
void Dispose();
}
public class TimerAdapter : Timer, ITimer { }
public sealed class StallManager : IDisposable
{
public static TimeSpan DefaultStallInterval = TimeSpan.FromMinutes(30);
private readonly IExecutionContext _executionContext;
private readonly double _interval;
private ITimer _timer { get; set; }
private int _intervalsElapsedWhileStalled = 0;
public StallManager(IExecutionContext executionContext, double interval, ITimer timer)
{
_executionContext = executionContext;
_interval = interval;
_timer = timer;
_timer.Interval = _interval;
_timer.Elapsed += TriggerWarning;
}
public StallManager(IExecutionContext executionContext, double interval) : this(executionContext, interval, new TimerAdapter()) { }
public StallManager(IExecutionContext executionContext) : this(executionContext, StallManager.DefaultStallInterval.TotalMilliseconds) { }
public void Initialize()
{
this.OnDataReceived(null, null);
}
public void Dispose()
{
try
{
_timer.Dispose();
}
catch { }
}
public void OnDataReceived(object sender, ProcessDataReceivedEventArgs e)
{
_intervalsElapsedWhileStalled = 0;
_timer.Stop();
_timer.Start();
}
private void TriggerWarning(object source, ElapsedEventArgs e)
{
_intervalsElapsedWhileStalled++;
_executionContext.Warning($"No output has been detected in the last {TimeSpan.FromMilliseconds(_intervalsElapsedWhileStalled * _interval).TotalMinutes} minutes and the process has not yet exited. This step may have stalled and might require some investigation.");
}
}
}

View File

@@ -1014,7 +1014,8 @@ namespace GitHub.Runner.Common.Tests.Worker
return false;
});
_outputManager = new OutputManager(_executionContext.Object, _commandManager.Object, stepContainer);
StallManager stallManager = new StallManager(_executionContext.Object);
_outputManager = new OutputManager(_executionContext.Object, _commandManager.Object, stepContainer, stallManager);
return hostContext;
}

View File

@@ -0,0 +1,156 @@
using System;
using System.Timers;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using GitHub.Runner.Worker;
using GitHub.Runner.Worker.Container;
using GitHub.Runner.Worker.Handlers;
using Moq;
using Xunit;
using DTWebApi = GitHub.DistributedTask.WebApi;
using GitHub.Runner.Common.Util;
using GitHub.DistributedTask.WebApi;
using System.Diagnostics;
namespace GitHub.Runner.Common.Tests.Worker
{
public class MockTimer : ITimer
{
public bool _started = false;
public bool _stopped = false;
public bool _reset = false;
public double Interval { get; set; }
public event ElapsedEventHandler Elapsed;
public bool AutoReset { get; set; }
public MockTimer()
{
Interval = 1;
}
public void Dispose() { }
public void Start()
{
_started = true;
if (_stopped)
{
_stopped = false;
_reset = true;
}
}
public void Stop()
{
_reset = false;
_started = false;
_stopped = true;
}
public void TimeElapsed()
{
this.Elapsed.Invoke(this, new EventArgs() as ElapsedEventArgs);
}
}
public sealed class StallManagerL0
{
private Mock<IExecutionContext> _executionContext;
private List<Tuple<DTWebApi.Issue, string>> _issues;
private Variables _variables;
private TestHostContext Setup(
[CallerMemberName] string name = "",
ContainerInfo jobContainer = null,
ContainerInfo stepContainer = null)
{
var hostContext = new TestHostContext(this, name);
_executionContext = new Mock<IExecutionContext>();
_issues = new List<Tuple<DTWebApi.Issue, string>>();
// Variables to test for secret scrubbing & FF options
_variables = new Variables(hostContext, new Dictionary<string, VariableValue>
{
{ "DistributedTask.AllowRunnerStallDetect", new VariableValue("true", true) },
});
_executionContext.Setup(x => x.Global)
.Returns(new GlobalContext
{
Container = jobContainer,
Variables = _variables,
WriteDebug = true,
});
_executionContext.Setup(x => x.AddIssue(It.IsAny<DTWebApi.Issue>(), It.IsAny<ExecutionContextLogOptions>()))
.Callback((DTWebApi.Issue issue, ExecutionContextLogOptions logOptions) =>
{
var resolvedMessage = issue.Message;
if (logOptions.WriteToLog && !string.IsNullOrEmpty(logOptions.LogMessageOverride))
{
resolvedMessage = logOptions.LogMessageOverride;
}
_issues.Add(new(issue, resolvedMessage));
});
return hostContext;
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void OutputWarningMessageOnTimeElapsed()
{
MockTimer timer = new MockTimer();
using (Setup())
using (StallManager manager = new StallManager(_executionContext.Object, TimeSpan.FromMinutes(10).TotalMilliseconds, timer))
{
timer.TimeElapsed();
Assert.Equal(1, _issues.Count);
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[0].Item1.Message);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[0].Item1.Type);
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public void ValidateTimerResetOnNewMessage()
{
MockTimer timer = new MockTimer();
using (Setup())
using (StallManager manager = new StallManager(_executionContext.Object, TimeSpan.FromMinutes(10).TotalMilliseconds, timer))
{
// Trigger 2 elapsed
timer.TimeElapsed();
timer.TimeElapsed();
// Should have triggered 2 warnings
Assert.Equal(2, _issues.Count);
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[0].Item1.Message);
Assert.Equal("No output has been detected in the last 20 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[1].Item1.Message);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[0].Item1.Type);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[1].Item1.Type);
// Should reset timer
manager.OnDataReceived(null, null);
Assert.True(timer._reset);
Assert.Equal(2, _issues.Count);
// Trigger another elapsed interval
timer.TimeElapsed();
// Timer should have reset and one new warning should have been added
Assert.Equal(3, _issues.Count);
Assert.Equal("No output has been detected in the last 10 minutes and the process has not yet exited. This step may have stalled and might require some investigation.", _issues[2].Item1.Message);
Assert.Equal(DTWebApi.IssueType.Warning, _issues[2].Item1.Type);
}
}
}
}

View File

@@ -1 +1 @@
2.307.1
2.306.0