Compare commits

...

1 Commits

Author SHA1 Message Date
Tingluo Huang
bd127e6a90 Implement termination grace period support for the runner 2025-05-02 00:03:39 -04:00
5 changed files with 96 additions and 19 deletions

View File

@@ -257,6 +257,7 @@ namespace GitHub.Runner.Common
public static readonly string ForcedActionsNodeVersion = "ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION";
public static readonly string PrintLogToStdout = "ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT";
public static readonly string ActionArchiveCacheDirectory = "ACTIONS_RUNNER_ACTION_ARCHIVE_CACHE";
public static readonly string ActionsTerminationGracePeriodSeconds = "ACTIONS_RUNNER_TERMINATION_GRACE_PERIOD_SECONDS";
}
public static class System

View File

@@ -34,7 +34,7 @@ namespace GitHub.Runner.Common
T GetService<T>() where T : class, IRunnerService;
void SetDefaultCulture(string name);
event EventHandler Unloading;
void ShutdownRunner(ShutdownReason reason);
void ShutdownRunner(ShutdownReason reason, TimeSpan delay = default);
void WritePerfCounter(string counter);
void LoadDefaultUserAgents();
@@ -74,6 +74,8 @@ namespace GitHub.Runner.Common
private string _perfFile;
private RunnerWebProxy _webProxy = new();
private string _hostType = string.Empty;
private ShutdownReason _shutdownReason = ShutdownReason.UserCancelled;
private int _shutdownReasonSet = 0;
// disable auth migration by default
private readonly ManualResetEventSlim _allowAuthMigration = new ManualResetEventSlim(false);
@@ -85,7 +87,7 @@ namespace GitHub.Runner.Common
public event EventHandler Unloading;
public event EventHandler<AuthMigrationEventArgs> AuthMigrationChanged;
public CancellationToken RunnerShutdownToken => _runnerShutdownTokenSource.Token;
public ShutdownReason RunnerShutdownReason { get; private set; }
public ShutdownReason RunnerShutdownReason => _shutdownReason;
public ISecretMasker SecretMasker => _secretMasker;
public List<ProductInfoHeaderValue> UserAgents => _userAgents;
public RunnerWebProxy WebProxy => _webProxy;
@@ -573,13 +575,29 @@ namespace GitHub.Runner.Common
}
public void ShutdownRunner(ShutdownReason reason)
public void ShutdownRunner(ShutdownReason reason, TimeSpan delay = default)
{
ArgUtil.NotNull(reason, nameof(reason));
_trace.Info($"Runner will be shutdown for {reason.ToString()}");
RunnerShutdownReason = reason;
_trace.Info($"Runner will be shutdown for {reason.ToString()} after {delay.TotalSeconds} seconds.");
if (Interlocked.CompareExchange(ref _shutdownReasonSet, 1, 0) == 0)
{
// Set the shutdown reason only if it hasn't been set before.
_shutdownReason = reason;
}
else
{
_trace.Verbose($"Runner shutdown reason already set to {_shutdownReason.ToString()}.");
}
if (delay.TotalSeconds == 0)
{
_runnerShutdownTokenSource.Cancel();
}
else
{
_runnerShutdownTokenSource.CancelAfter(delay);
}
}
public override void Dispose()
{

View File

@@ -37,6 +37,8 @@ namespace GitHub.Runner.Listener
private readonly object _authMigrationTelemetryLock = new();
private IRunnerServer _runnerServer;
private CancellationTokenSource _authMigrationTelemetryTokenSource = new();
private bool _runnerExiting = false;
private bool _hasTerminationGracePeriod = false;
// <summary>
// Helps avoid excessive calls to Run Service when encountering non-retriable errors from /acquirejob.
@@ -309,6 +311,12 @@ namespace GitHub.Runner.Listener
_term.WriteLine("https://docs.github.com/en/actions/hosting-your-own-runners/autoscaling-with-self-hosted-runners#using-ephemeral-runners-for-autoscaling", ConsoleColor.Yellow);
}
if (!string.IsNullOrEmpty(Environment.GetEnvironmentVariable(Constants.Variables.Agent.ActionsTerminationGracePeriodSeconds)))
{
_hasTerminationGracePeriod = true;
Trace.Verbose($"Runner has termination grace period set");
}
var cred = store.GetCredentials();
if (cred != null &&
cred.Scheme == Constants.Configuration.OAuth &&
@@ -339,9 +347,10 @@ namespace GitHub.Runner.Listener
private void Runner_Unloading(object sender, EventArgs e)
{
_runnerExiting = true;
if ((!_inConfigStage) && (!HostContext.RunnerShutdownToken.IsCancellationRequested))
{
HostContext.ShutdownRunner(ShutdownReason.UserCancelled);
HostContext.ShutdownRunner(ShutdownReason.UserCancelled, GetShutdownDelay());
_completedCommand.WaitOne(Constants.Runner.ExitOnUnloadTimeout);
}
}
@@ -349,6 +358,7 @@ namespace GitHub.Runner.Listener
private void CtrlCHandler(object sender, EventArgs e)
{
_term.WriteLine("Exiting...");
_runnerExiting = true;
if (_inConfigStage)
{
HostContext.Dispose();
@@ -371,15 +381,27 @@ namespace GitHub.Runner.Listener
reason = ShutdownReason.UserCancelled;
}
HostContext.ShutdownRunner(reason);
HostContext.ShutdownRunner(reason, GetShutdownDelay());
}
else
{
HostContext.ShutdownRunner(ShutdownReason.UserCancelled);
HostContext.ShutdownRunner(ShutdownReason.UserCancelled, GetShutdownDelay());
}
}
}
private void HandleJobStatusEvent(object sender, JobStatusEventArgs e)
{
if (_hasTerminationGracePeriod &&
e != null &&
e.Status != TaskAgentStatus.Busy &&
_runnerExiting)
{
Trace.Info("Runner is no longer busy, shutting down.");
HostContext.ShutdownRunner(ShutdownReason.UserCancelled);
}
}
private IMessageListener GetMessageListener(RunnerSettings settings)
{
if (settings.UseV2Flow)
@@ -430,9 +452,13 @@ namespace GitHub.Runner.Listener
bool autoUpdateInProgress = false;
Task<bool> selfUpdateTask = null;
bool runOnceJobReceived = false;
jobDispatcher = HostContext.CreateService<IJobDispatcher>();
jobDispatcher = HostContext.GetService<IJobDispatcher>();
jobDispatcher.JobStatus += _listener.OnJobStatus;
if (_hasTerminationGracePeriod)
{
jobDispatcher.JobStatus += HandleJobStatusEvent;
}
while (!HostContext.RunnerShutdownToken.IsCancellationRequested)
{
@@ -703,6 +729,10 @@ namespace GitHub.Runner.Listener
{
if (jobDispatcher != null)
{
if (_hasTerminationGracePeriod)
{
jobDispatcher.JobStatus -= HandleJobStatusEvent;
}
jobDispatcher.JobStatus -= _listener.OnJobStatus;
await jobDispatcher.ShutdownAsync();
}
@@ -810,6 +840,34 @@ namespace GitHub.Runner.Listener
}
}
private TimeSpan GetShutdownDelay()
{
TimeSpan delay = TimeSpan.Zero;
if (_hasTerminationGracePeriod)
{
var jobDispatcher = HostContext.GetService<IJobDispatcher>();
if (jobDispatcher.Busy)
{
Trace.Info("Runner is busy, checking for grace period.");
var delayEnv = Environment.GetEnvironmentVariable(Constants.Variables.Agent.ActionsTerminationGracePeriodSeconds);
if (!string.IsNullOrEmpty(delayEnv) &&
int.TryParse(delayEnv, out int delaySeconds) &&
delaySeconds > 0 &&
delaySeconds < 60 * 60) // 1 hour
{
Trace.Info($"Waiting for {delaySeconds} seconds before shutting down.");
delay = TimeSpan.FromSeconds(delaySeconds);
}
}
else
{
Trace.Verbose("Runner is not busy, no grace period.");
}
}
return delay;
}
private void PrintUsage(CommandSettings command)
{
string separator;

View File

@@ -126,7 +126,7 @@ namespace GitHub.Runner.Common.Tests.Listener
});
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
_configStore.Setup(x => x.IsServiceConfigured()).Returns(false);
//Act
@@ -309,7 +309,7 @@ namespace GitHub.Runner.Common.Tests.Listener
});
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
_configStore.Setup(x => x.IsServiceConfigured()).Returns(false);
//Act
@@ -413,7 +413,7 @@ namespace GitHub.Runner.Common.Tests.Listener
});
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
_configStore.Setup(x => x.IsServiceConfigured()).Returns(false);
//Act
@@ -503,7 +503,7 @@ namespace GitHub.Runner.Common.Tests.Listener
});
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
_configStore.Setup(x => x.IsServiceConfigured()).Returns(false);
//Act
@@ -578,7 +578,7 @@ namespace GitHub.Runner.Common.Tests.Listener
hc.SetSingleton<IConfigurationStore>(_configStore.Object);
hc.SetSingleton<ICredentialManager>(_credentialManager.Object);
hc.EnqueueInstance<IErrorThrottler>(_acquireJobThrottler.Object);
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
runner.Initialize(hc);
var settings = new RunnerSettings
@@ -679,7 +679,7 @@ namespace GitHub.Runner.Common.Tests.Listener
hc.SetSingleton<ICredentialManager>(_credentialManager.Object);
hc.EnqueueInstance<IErrorThrottler>(_acquireJobThrottler.Object);
hc.EnqueueInstance<IActionsRunServer>(_actionsRunServer.Object);
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
runner.Initialize(hc);
var settings = new RunnerSettings
@@ -780,7 +780,7 @@ namespace GitHub.Runner.Common.Tests.Listener
hc.SetSingleton<ICredentialManager>(_credentialManager.Object);
hc.EnqueueInstance<IErrorThrottler>(_acquireJobThrottler.Object);
hc.EnqueueInstance<IRunServer>(_runServer.Object);
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
runner.Initialize(hc);
var settings = new RunnerSettings
@@ -880,7 +880,7 @@ namespace GitHub.Runner.Common.Tests.Listener
hc.SetSingleton<ISelfUpdater>(_updater.Object);
hc.SetSingleton<ICredentialManager>(_credentialManager.Object);
hc.EnqueueInstance<IErrorThrottler>(_acquireJobThrottler.Object);
hc.EnqueueInstance<IJobDispatcher>(_jobDispatcher.Object);
hc.SetSingleton<IJobDispatcher>(_jobDispatcher.Object);
hc.EnqueueInstance<IRunServer>(_runServer.Object);
hc.EnqueueInstance<IRunServer>(_runServer.Object);

View File

@@ -339,7 +339,7 @@ namespace GitHub.Runner.Common.Tests
return _traceManager[name];
}
public void ShutdownRunner(ShutdownReason reason)
public void ShutdownRunner(ShutdownReason reason, TimeSpan delay = default)
{
ArgUtil.NotNull(reason, nameof(reason));
RunnerShutdownReason = reason;