Support auth migration using authUrlV2 in Runner/MessageListener. (#3787)

This commit is contained in:
Tingluo Huang
2025-04-10 12:58:33 -04:00
committed by GitHub
parent f1b5b5bd5c
commit d5ccbd10d1
6 changed files with 1204 additions and 12 deletions

View File

@@ -26,6 +26,7 @@ namespace GitHub.Runner.Listener
private TaskAgentStatus runnerStatus = TaskAgentStatus.Online;
private CancellationTokenSource _getMessagesTokenSource;
private VssCredentials _creds;
private VssCredentials _credsV2;
private TaskAgentSession _session;
private IRunnerServer _runnerServer;
private IBrokerServer _brokerServer;
@@ -35,7 +36,8 @@ namespace GitHub.Runner.Listener
private readonly TimeSpan _sessionCreationRetryInterval = TimeSpan.FromSeconds(30);
private readonly TimeSpan _sessionConflictRetryLimit = TimeSpan.FromMinutes(4);
private readonly TimeSpan _clockSkewRetryLimit = TimeSpan.FromMinutes(30);
private bool _needRefreshCredsV2 = false;
private bool _handlerInitialized = false;
public override void Initialize(IHostContext hostContext)
{
@@ -88,7 +90,8 @@ namespace GitHub.Runner.Listener
try
{
Trace.Info("Connecting to the Broker Server...");
await _brokerServer.ConnectAsync(new Uri(serverUrlV2), _creds);
_credsV2 = _credMgr.LoadCredentials(allowAuthUrlV2: true);
await _brokerServer.ConnectAsync(new Uri(serverUrlV2), _credsV2);
Trace.Info("VssConnection created");
if (!string.IsNullOrEmpty(serverUrl) &&
@@ -113,6 +116,13 @@ namespace GitHub.Runner.Listener
encounteringError = false;
}
if (!_handlerInitialized)
{
// Register event handler for auth migration state change
HostContext.AuthMigrationChanged += HandleAuthMigrationChanged;
_handlerInitialized = true;
}
return CreateSessionResult.Success;
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
@@ -131,7 +141,7 @@ namespace GitHub.Runner.Listener
Trace.Error("Catch exception during create session.");
Trace.Error(ex);
if (ex is VssOAuthTokenRequestException vssOAuthEx && _creds.Federated is VssOAuthCredential vssOAuthCred)
if (ex is VssOAuthTokenRequestException vssOAuthEx && _credsV2.Federated is VssOAuthCredential vssOAuthCred)
{
// "invalid_client" means the runner registration has been deleted from the server.
if (string.Equals(vssOAuthEx.Error, "invalid_client", StringComparison.OrdinalIgnoreCase))
@@ -162,6 +172,12 @@ namespace GitHub.Runner.Listener
return CreateSessionResult.Failure;
}
if (HostContext.AllowAuthMigration)
{
Trace.Info("Disable migration mode for 60 minutes.");
HostContext.DeferAuthMigration(TimeSpan.FromMinutes(60), $"Session creation failed with exception: {ex}");
}
if (!encounteringError) //print the message only on the first error
{
_term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
@@ -178,6 +194,11 @@ namespace GitHub.Runner.Listener
{
if (_session != null && _session.SessionId != Guid.Empty)
{
if (_handlerInitialized)
{
HostContext.AuthMigrationChanged -= HandleAuthMigrationChanged;
}
if (!_accessTokenRevoked)
{
using (var ts = new CancellationTokenSource(TimeSpan.FromSeconds(30)))
@@ -220,6 +241,13 @@ namespace GitHub.Runner.Listener
_getMessagesTokenSource = CancellationTokenSource.CreateLinkedTokenSource(token);
try
{
if (_needRefreshCredsV2)
{
Trace.Info("Refreshing broker connection.");
await RefreshBrokerConnectionAsync();
_needRefreshCredsV2 = false;
}
message = await _brokerServer.GetRunnerMessageAsync(_session.SessionId,
runnerStatus,
BuildConstants.RunnerPackage.Version,
@@ -299,6 +327,12 @@ namespace GitHub.Runner.Listener
encounteringError = true;
}
if (HostContext.AllowAuthMigration)
{
Trace.Info("Disable migration mode for 60 minutes.");
HostContext.DeferAuthMigration(TimeSpan.FromMinutes(60), $"Get next message failed with exception: {ex}");
}
// re-create VssConnection before next retry
await RefreshBrokerConnectionAsync();
@@ -434,9 +468,15 @@ namespace GitHub.Runner.Listener
private async Task RefreshBrokerConnectionAsync()
{
Trace.Info("Reload credentials.");
_creds = _credMgr.LoadCredentials(allowAuthUrlV2: false); // TODO: change to `true` in the next PR.
await _brokerServer.ConnectAsync(new Uri(_settings.ServerUrlV2), _creds);
_credsV2 = _credMgr.LoadCredentials(allowAuthUrlV2: true);
await _brokerServer.ConnectAsync(new Uri(_settings.ServerUrlV2), _credsV2);
Trace.Info("Connection to Broker Server recreated.");
}
private void HandleAuthMigrationChanged(object sender, EventArgs e)
{
Trace.Info($"Auth migration changed. Current allow auth migration state: {HostContext.AllowAuthMigration}");
_needRefreshCredsV2 = true;
}
}
}

View File

@@ -55,6 +55,9 @@ namespace GitHub.Runner.Listener
private TaskAgentStatus runnerStatus = TaskAgentStatus.Online;
private CancellationTokenSource _getMessagesTokenSource;
private VssCredentials _creds;
private VssCredentials _credsV2;
private bool _needRefreshCredsV2 = false;
private bool _handlerInitialized = false;
public override void Initialize(IHostContext hostContext)
{
@@ -120,6 +123,13 @@ namespace GitHub.Runner.Listener
encounteringError = false;
}
if (!_handlerInitialized)
{
Trace.Info("Registering AuthMigrationChanged event handler.");
HostContext.AuthMigrationChanged += HandleAuthMigrationChanged;
_handlerInitialized = true;
}
return CreateSessionResult.Success;
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
@@ -185,6 +195,11 @@ namespace GitHub.Runner.Listener
{
if (_session != null && _session.SessionId != Guid.Empty)
{
if (_handlerInitialized)
{
HostContext.AuthMigrationChanged -= HandleAuthMigrationChanged;
}
if (!_accessTokenRevoked)
{
using (var ts = new CancellationTokenSource(TimeSpan.FromSeconds(30)))
@@ -249,7 +264,15 @@ namespace GitHub.Runner.Listener
{
var migrationMessage = JsonUtility.FromString<BrokerMigrationMessage>(message.Body);
await _brokerServer.UpdateConnectionIfNeeded(migrationMessage.BrokerBaseUrl, _creds);
_credsV2 = _credMgr.LoadCredentials(allowAuthUrlV2: true);
await _brokerServer.UpdateConnectionIfNeeded(migrationMessage.BrokerBaseUrl, _credsV2);
if (_needRefreshCredsV2)
{
Trace.Info("Refreshing credentials for V2.");
await _brokerServer.ForceRefreshConnection(_credsV2);
_needRefreshCredsV2 = false;
}
message = await _brokerServer.GetRunnerMessageAsync(_session.SessionId,
runnerStatus,
BuildConstants.RunnerPackage.Version,
@@ -341,6 +364,12 @@ namespace GitHub.Runner.Listener
encounteringError = true;
}
if (HostContext.AllowAuthMigration)
{
Trace.Info("Disable migration mode for 60 minutes.");
HostContext.DeferAuthMigration(TimeSpan.FromMinutes(60), $"Get next message failed with exception: {ex}");
}
// re-create VssConnection before next retry
await _runnerServer.RefreshConnectionAsync(RunnerConnectionType.MessageQueue, TimeSpan.FromSeconds(60));
@@ -401,8 +430,8 @@ namespace GitHub.Runner.Listener
public async Task RefreshListenerTokenAsync()
{
await _runnerServer.RefreshConnectionAsync(RunnerConnectionType.MessageQueue, TimeSpan.FromSeconds(60));
_creds = _credMgr.LoadCredentials(allowAuthUrlV2: false); // TODO: change to `true` in next PR
await _brokerServer.ForceRefreshConnection(_creds);
_credsV2 = _credMgr.LoadCredentials(allowAuthUrlV2: true);
await _brokerServer.ForceRefreshConnection(_credsV2);
}
private TaskAgentMessage DecryptMessage(TaskAgentMessage message)
@@ -533,5 +562,11 @@ namespace GitHub.Runner.Listener
return true;
}
}
private void HandleAuthMigrationChanged(object sender, EventArgs e)
{
Trace.Info($"Auth migration changed. Current allow auth migration state: {HostContext.AllowAuthMigration}");
_needRefreshCredsV2 = true;
}
}
}

View File

@@ -1,4 +1,5 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
@@ -31,7 +32,11 @@ namespace GitHub.Runner.Listener
private ITerminal _term;
private bool _inConfigStage;
private ManualResetEvent _completedCommand = new(false);
private readonly ConcurrentQueue<string> _authMigrationTelemetries = new();
private Task _authMigrationTelemetryTask;
private readonly object _authMigrationTelemetryLock = new();
private IRunnerServer _runnerServer;
private CancellationTokenSource _authMigrationTelemetryTokenSource = new();
// <summary>
// Helps avoid excessive calls to Run Service when encountering non-retriable errors from /acquirejob.
@@ -68,6 +73,8 @@ namespace GitHub.Runner.Listener
//register a SIGTERM handler
HostContext.Unloading += Runner_Unloading;
HostContext.AuthMigrationChanged += HandleAuthMigrationChanged;
// TODO Unit test to cover this logic
Trace.Info(nameof(ExecuteCommand));
var configManager = HostContext.GetService<IConfigurationManager>();
@@ -313,6 +320,8 @@ namespace GitHub.Runner.Listener
}
finally
{
_authMigrationTelemetryTokenSource?.Cancel();
HostContext.AuthMigrationChanged -= HandleAuthMigrationChanged;
_term.CancelKeyPress -= CtrlCHandler;
HostContext.Unloading -= Runner_Unloading;
_completedCommand.Set();
@@ -572,18 +581,18 @@ namespace GitHub.Runner.Listener
// Create connection
var credMgr = HostContext.GetService<ICredentialManager>();
var creds = credMgr.LoadCredentials(allowAuthUrlV2: false);
if (string.IsNullOrEmpty(messageRef.RunServiceUrl))
{
var creds = credMgr.LoadCredentials(allowAuthUrlV2: false);
var actionsRunServer = HostContext.CreateService<IActionsRunServer>();
await actionsRunServer.ConnectAsync(new Uri(settings.ServerUrl), creds);
jobRequestMessage = await actionsRunServer.GetJobMessageAsync(messageRef.RunnerRequestId, messageQueueLoopTokenSource.Token);
}
else
{
var credsV2 = credMgr.LoadCredentials(allowAuthUrlV2: true);
var runServer = HostContext.CreateService<IRunServer>();
await runServer.ConnectAsync(new Uri(messageRef.RunServiceUrl), creds);
await runServer.ConnectAsync(new Uri(messageRef.RunServiceUrl), credsV2);
try
{
jobRequestMessage = await runServer.GetJobMessageAsync(messageRef.RunnerRequestId, messageRef.BillingOwnerId, messageQueueLoopTokenSource.Token);
@@ -601,6 +610,13 @@ namespace GitHub.Runner.Listener
catch (Exception ex)
{
Trace.Error($"Caught exception from acquiring job message: {ex}");
if (HostContext.AllowAuthMigration)
{
Trace.Info("Disable migration mode for 60 minutes.");
HostContext.DeferAuthMigration(TimeSpan.FromMinutes(60), $"Acquire job failed with exception: {ex}");
}
continue;
}
}
@@ -718,6 +734,73 @@ namespace GitHub.Runner.Listener
return Constants.Runner.ReturnCode.Success;
}
private void HandleAuthMigrationChanged(object sender, AuthMigrationEventArgs e)
{
Trace.Verbose("Handle AuthMigrationChanged in Runner");
_authMigrationTelemetries.Enqueue($"{DateTime.UtcNow.ToString("O")}: {e.Trace}");
// only start the telemetry reporting task once auth migration is changed (enabled or disabled)
lock (_authMigrationTelemetryLock)
{
if (_authMigrationTelemetryTask == null)
{
_authMigrationTelemetryTask = ReportAuthMigrationTelemetryAsync(_authMigrationTelemetryTokenSource.Token);
}
}
}
private async Task ReportAuthMigrationTelemetryAsync(CancellationToken token)
{
var configManager = HostContext.GetService<IConfigurationManager>();
var runnerSettings = configManager.LoadSettings();
while (!token.IsCancellationRequested)
{
try
{
await HostContext.Delay(TimeSpan.FromSeconds(60), token);
}
catch (TaskCanceledException)
{
// Ignore cancellation
}
Trace.Verbose("Checking for auth migration telemetry to report");
while (_authMigrationTelemetries.TryDequeue(out var telemetry))
{
Trace.Verbose($"Reporting auth migration telemetry: {telemetry}");
if (runnerSettings != null)
{
try
{
using (var tokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(30)))
{
await _runnerServer.UpdateAgentUpdateStateAsync(runnerSettings.PoolId, runnerSettings.AgentId, "RefreshConfig", telemetry, tokenSource.Token);
}
}
catch (Exception ex)
{
Trace.Error("Failed to report auth migration telemetry.");
Trace.Error(ex);
_authMigrationTelemetries.Enqueue(telemetry);
}
}
if (!token.IsCancellationRequested)
{
try
{
await HostContext.Delay(TimeSpan.FromSeconds(10), token);
}
catch (TaskCanceledException)
{
// Ignore cancellation
}
}
}
}
}
private void PrintUsage(CommandSettings command)
{
string separator;