mirror of
https://github.com/actions/runner.git
synced 2025-12-11 12:57:05 +00:00
Backoff to avoid excessive retries to Run Service in a duration (#3354)
This commit is contained in:
44
src/Runner.Listener/ErrorThrottler.cs
Normal file
44
src/Runner.Listener/ErrorThrottler.cs
Normal file
@@ -0,0 +1,44 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using GitHub.Runner.Common;
|
||||
using GitHub.Services.Common;
|
||||
|
||||
namespace GitHub.Runner.Listener
|
||||
{
|
||||
[ServiceLocator(Default = typeof(ErrorThrottler))]
|
||||
public interface IErrorThrottler : IRunnerService
|
||||
{
|
||||
void Reset();
|
||||
Task IncrementAndWaitAsync(CancellationToken token);
|
||||
}
|
||||
|
||||
public sealed class ErrorThrottler : RunnerService, IErrorThrottler
|
||||
{
|
||||
internal static readonly TimeSpan MinBackoff = TimeSpan.FromSeconds(1);
|
||||
internal static readonly TimeSpan MaxBackoff = TimeSpan.FromMinutes(1);
|
||||
internal static readonly TimeSpan BackoffCoefficient = TimeSpan.FromSeconds(1);
|
||||
private int _count = 0;
|
||||
|
||||
public void Reset()
|
||||
{
|
||||
_count = 0;
|
||||
}
|
||||
|
||||
public async Task IncrementAndWaitAsync(CancellationToken token)
|
||||
{
|
||||
if (++_count <= 1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
TimeSpan backoff = BackoffTimerHelper.GetExponentialBackoff(
|
||||
attempt: _count - 2, // 0-based attempt
|
||||
minBackoff: MinBackoff,
|
||||
maxBackoff: MaxBackoff,
|
||||
deltaBackoff: BackoffCoefficient);
|
||||
Trace.Warning($"Back off {backoff.TotalSeconds} seconds before next attempt. Current consecutive error count: {_count}");
|
||||
await HostContext.Delay(backoff, token);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -32,10 +32,25 @@ namespace GitHub.Runner.Listener
|
||||
private bool _inConfigStage;
|
||||
private ManualResetEvent _completedCommand = new(false);
|
||||
|
||||
// <summary>
|
||||
// Helps avoid excessive calls to Run Service when encountering non-retriable errors from /acquirejob.
|
||||
// Normally we rely on the HTTP clients to back off between retry attempts. However, acquiring a job
|
||||
// involves calls to both Run Serivce and Broker. And Run Service and Broker communicate with each other
|
||||
// in an async fashion.
|
||||
//
|
||||
// When Run Service encounters a non-retriable error, it sends an async message to Broker. The runner will,
|
||||
// however, immediately call Broker to get the next message. If the async event from Run Service to Broker
|
||||
// has not yet been processed, the next message from Broker may be the same job message.
|
||||
//
|
||||
// The error throttler helps us back off when encountering successive, non-retriable errors from /acquirejob.
|
||||
// </summary>
|
||||
private IErrorThrottler _acquireJobThrottler;
|
||||
|
||||
public override void Initialize(IHostContext hostContext)
|
||||
{
|
||||
base.Initialize(hostContext);
|
||||
_term = HostContext.GetService<ITerminal>();
|
||||
_acquireJobThrottler = HostContext.CreateService<IErrorThrottler>();
|
||||
}
|
||||
|
||||
public async Task<int> ExecuteCommand(CommandSettings command)
|
||||
@@ -565,13 +580,16 @@ namespace GitHub.Runner.Listener
|
||||
await runServer.ConnectAsync(new Uri(messageRef.RunServiceUrl), creds);
|
||||
try
|
||||
{
|
||||
jobRequestMessage =
|
||||
await runServer.GetJobMessageAsync(messageRef.RunnerRequestId,
|
||||
messageQueueLoopTokenSource.Token);
|
||||
jobRequestMessage = await runServer.GetJobMessageAsync(messageRef.RunnerRequestId, messageQueueLoopTokenSource.Token);
|
||||
_acquireJobThrottler.Reset();
|
||||
}
|
||||
catch (TaskOrchestrationJobAlreadyAcquiredException)
|
||||
catch (Exception ex) when (
|
||||
ex is TaskOrchestrationJobNotFoundException || // HTTP status 404
|
||||
ex is TaskOrchestrationJobAlreadyAcquiredException || // HTTP status 409
|
||||
ex is TaskOrchestrationJobUnprocessableException) // HTTP status 422
|
||||
{
|
||||
Trace.Info("Job is already acquired, skip this message.");
|
||||
Trace.Info($"Skipping message Job. {ex.Message}");
|
||||
await _acquireJobThrottler.IncrementAndWaitAsync(messageQueueLoopTokenSource.Token);
|
||||
continue;
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
Reference in New Issue
Block a user