diff --git a/src/Runner.Common/Constants.cs b/src/Runner.Common/Constants.cs index a9f91223a..9127ca8d5 100644 --- a/src/Runner.Common/Constants.cs +++ b/src/Runner.Common/Constants.cs @@ -257,6 +257,7 @@ namespace GitHub.Runner.Common public static readonly string ForcedActionsNodeVersion = "ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION"; public static readonly string PrintLogToStdout = "ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT"; public static readonly string ActionArchiveCacheDirectory = "ACTIONS_RUNNER_ACTION_ARCHIVE_CACHE"; + public static readonly string ActionsTerminationGracePeriodSeconds = "ACTIONS_RUNNER_TERMINATION_GRACE_PERIOD_SECONDS"; } public static class System diff --git a/src/Runner.Common/HostContext.cs b/src/Runner.Common/HostContext.cs index 73ca108ae..d7f8b80e1 100644 --- a/src/Runner.Common/HostContext.cs +++ b/src/Runner.Common/HostContext.cs @@ -34,7 +34,7 @@ namespace GitHub.Runner.Common T GetService() where T : class, IRunnerService; void SetDefaultCulture(string name); event EventHandler Unloading; - void ShutdownRunner(ShutdownReason reason); + void ShutdownRunner(ShutdownReason reason, TimeSpan delay = default); void WritePerfCounter(string counter); void LoadDefaultUserAgents(); @@ -74,6 +74,8 @@ namespace GitHub.Runner.Common private string _perfFile; private RunnerWebProxy _webProxy = new(); private string _hostType = string.Empty; + private ShutdownReason _shutdownReason = ShutdownReason.UserCancelled; + private int _shutdownReasonSet = 0; // disable auth migration by default private readonly ManualResetEventSlim _allowAuthMigration = new ManualResetEventSlim(false); @@ -85,7 +87,7 @@ namespace GitHub.Runner.Common public event EventHandler Unloading; public event EventHandler AuthMigrationChanged; public CancellationToken RunnerShutdownToken => _runnerShutdownTokenSource.Token; - public ShutdownReason RunnerShutdownReason { get; private set; } + public ShutdownReason RunnerShutdownReason => _shutdownReason; public ISecretMasker SecretMasker => _secretMasker; public List UserAgents => _userAgents; public RunnerWebProxy WebProxy => _webProxy; @@ -573,12 +575,28 @@ namespace GitHub.Runner.Common } - public void ShutdownRunner(ShutdownReason reason) + public void ShutdownRunner(ShutdownReason reason, TimeSpan delay = default) { ArgUtil.NotNull(reason, nameof(reason)); - _trace.Info($"Runner will be shutdown for {reason.ToString()}"); - RunnerShutdownReason = reason; - _runnerShutdownTokenSource.Cancel(); + _trace.Info($"Runner will be shutdown for {reason.ToString()} after {delay.TotalSeconds} seconds."); + if (Interlocked.CompareExchange(ref _shutdownReasonSet, 1, 0) == 0) + { + // Set the shutdown reason only if it hasn't been set before. + _shutdownReason = reason; + } + else + { + _trace.Verbose($"Runner shutdown reason already set to {_shutdownReason.ToString()}."); + } + + if (delay.TotalSeconds == 0) + { + _runnerShutdownTokenSource.Cancel(); + } + else + { + _runnerShutdownTokenSource.CancelAfter(delay); + } } public override void Dispose() diff --git a/src/Runner.Listener/Runner.cs b/src/Runner.Listener/Runner.cs index 51df80b24..15acea382 100644 --- a/src/Runner.Listener/Runner.cs +++ b/src/Runner.Listener/Runner.cs @@ -37,6 +37,8 @@ namespace GitHub.Runner.Listener private readonly object _authMigrationTelemetryLock = new(); private IRunnerServer _runnerServer; private CancellationTokenSource _authMigrationTelemetryTokenSource = new(); + private bool _runnerExiting = false; + private bool _hasTerminationGracePeriod = false; // // Helps avoid excessive calls to Run Service when encountering non-retriable errors from /acquirejob. @@ -309,6 +311,12 @@ namespace GitHub.Runner.Listener _term.WriteLine("https://docs.github.com/en/actions/hosting-your-own-runners/autoscaling-with-self-hosted-runners#using-ephemeral-runners-for-autoscaling", ConsoleColor.Yellow); } + if (!string.IsNullOrEmpty(Environment.GetEnvironmentVariable(Constants.Variables.Agent.ActionsTerminationGracePeriodSeconds))) + { + _hasTerminationGracePeriod = true; + Trace.Verbose($"Runner has termination grace period set"); + } + var cred = store.GetCredentials(); if (cred != null && cred.Scheme == Constants.Configuration.OAuth && @@ -339,9 +347,10 @@ namespace GitHub.Runner.Listener private void Runner_Unloading(object sender, EventArgs e) { + _runnerExiting = true; if ((!_inConfigStage) && (!HostContext.RunnerShutdownToken.IsCancellationRequested)) { - HostContext.ShutdownRunner(ShutdownReason.UserCancelled); + HostContext.ShutdownRunner(ShutdownReason.UserCancelled, GetShutdownDelay()); _completedCommand.WaitOne(Constants.Runner.ExitOnUnloadTimeout); } } @@ -349,6 +358,7 @@ namespace GitHub.Runner.Listener private void CtrlCHandler(object sender, EventArgs e) { _term.WriteLine("Exiting..."); + _runnerExiting = true; if (_inConfigStage) { HostContext.Dispose(); @@ -371,15 +381,27 @@ namespace GitHub.Runner.Listener reason = ShutdownReason.UserCancelled; } - HostContext.ShutdownRunner(reason); + HostContext.ShutdownRunner(reason, GetShutdownDelay()); } else { - HostContext.ShutdownRunner(ShutdownReason.UserCancelled); + HostContext.ShutdownRunner(ShutdownReason.UserCancelled, GetShutdownDelay()); } } } + private void HandleJobStatusEvent(object sender, JobStatusEventArgs e) + { + if (_hasTerminationGracePeriod && + e != null && + e.Status != TaskAgentStatus.Busy && + _runnerExiting) + { + Trace.Info("Runner is no longer busy, shutting down."); + HostContext.ShutdownRunner(ShutdownReason.UserCancelled); + } + } + private IMessageListener GetMessageListener(RunnerSettings settings) { if (settings.UseV2Flow) @@ -430,9 +452,13 @@ namespace GitHub.Runner.Listener bool autoUpdateInProgress = false; Task selfUpdateTask = null; bool runOnceJobReceived = false; - jobDispatcher = HostContext.CreateService(); + jobDispatcher = HostContext.GetService(); jobDispatcher.JobStatus += _listener.OnJobStatus; + if (_hasTerminationGracePeriod) + { + jobDispatcher.JobStatus += HandleJobStatusEvent; + } while (!HostContext.RunnerShutdownToken.IsCancellationRequested) { @@ -703,6 +729,10 @@ namespace GitHub.Runner.Listener { if (jobDispatcher != null) { + if (_hasTerminationGracePeriod) + { + jobDispatcher.JobStatus -= HandleJobStatusEvent; + } jobDispatcher.JobStatus -= _listener.OnJobStatus; await jobDispatcher.ShutdownAsync(); } @@ -810,6 +840,34 @@ namespace GitHub.Runner.Listener } } + private TimeSpan GetShutdownDelay() + { + TimeSpan delay = TimeSpan.Zero; + if (_hasTerminationGracePeriod) + { + var jobDispatcher = HostContext.GetService(); + if (jobDispatcher.Busy) + { + Trace.Info("Runner is busy, checking for grace period."); + var delayEnv = Environment.GetEnvironmentVariable(Constants.Variables.Agent.ActionsTerminationGracePeriodSeconds); + if (!string.IsNullOrEmpty(delayEnv) && + int.TryParse(delayEnv, out int delaySeconds) && + delaySeconds > 0 && + delaySeconds < 60 * 60) // 1 hour + { + Trace.Info($"Waiting for {delaySeconds} seconds before shutting down."); + delay = TimeSpan.FromSeconds(delaySeconds); + } + } + else + { + Trace.Verbose("Runner is not busy, no grace period."); + } + } + + return delay; + } + private void PrintUsage(CommandSettings command) { string separator; diff --git a/src/Test/L0/Listener/RunnerL0.cs b/src/Test/L0/Listener/RunnerL0.cs index 6a4dce372..e1c03d38b 100644 --- a/src/Test/L0/Listener/RunnerL0.cs +++ b/src/Test/L0/Listener/RunnerL0.cs @@ -126,7 +126,7 @@ namespace GitHub.Runner.Common.Tests.Listener }); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); _configStore.Setup(x => x.IsServiceConfigured()).Returns(false); //Act @@ -309,7 +309,7 @@ namespace GitHub.Runner.Common.Tests.Listener }); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); _configStore.Setup(x => x.IsServiceConfigured()).Returns(false); //Act @@ -413,7 +413,7 @@ namespace GitHub.Runner.Common.Tests.Listener }); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); _configStore.Setup(x => x.IsServiceConfigured()).Returns(false); //Act @@ -503,7 +503,7 @@ namespace GitHub.Runner.Common.Tests.Listener }); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); _configStore.Setup(x => x.IsServiceConfigured()).Returns(false); //Act @@ -578,7 +578,7 @@ namespace GitHub.Runner.Common.Tests.Listener hc.SetSingleton(_configStore.Object); hc.SetSingleton(_credentialManager.Object); hc.EnqueueInstance(_acquireJobThrottler.Object); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); runner.Initialize(hc); var settings = new RunnerSettings @@ -679,7 +679,7 @@ namespace GitHub.Runner.Common.Tests.Listener hc.SetSingleton(_credentialManager.Object); hc.EnqueueInstance(_acquireJobThrottler.Object); hc.EnqueueInstance(_actionsRunServer.Object); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); runner.Initialize(hc); var settings = new RunnerSettings @@ -780,7 +780,7 @@ namespace GitHub.Runner.Common.Tests.Listener hc.SetSingleton(_credentialManager.Object); hc.EnqueueInstance(_acquireJobThrottler.Object); hc.EnqueueInstance(_runServer.Object); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); runner.Initialize(hc); var settings = new RunnerSettings @@ -880,7 +880,7 @@ namespace GitHub.Runner.Common.Tests.Listener hc.SetSingleton(_updater.Object); hc.SetSingleton(_credentialManager.Object); hc.EnqueueInstance(_acquireJobThrottler.Object); - hc.EnqueueInstance(_jobDispatcher.Object); + hc.SetSingleton(_jobDispatcher.Object); hc.EnqueueInstance(_runServer.Object); hc.EnqueueInstance(_runServer.Object); diff --git a/src/Test/L0/TestHostContext.cs b/src/Test/L0/TestHostContext.cs index c1cf69220..2847b6a7f 100644 --- a/src/Test/L0/TestHostContext.cs +++ b/src/Test/L0/TestHostContext.cs @@ -339,7 +339,7 @@ namespace GitHub.Runner.Common.Tests return _traceManager[name]; } - public void ShutdownRunner(ShutdownReason reason) + public void ShutdownRunner(ShutdownReason reason, TimeSpan delay = default) { ArgUtil.NotNull(reason, nameof(reason)); RunnerShutdownReason = reason;