Switch to use token service instead of SPS for exchanging oauth token. (#325)

* Gracefully switch the runner to use Token Service instead of SPS.

* PR feedback.

* feedback2

* report error.
This commit is contained in:
Tingluo Huang
2020-03-04 21:40:58 -05:00
committed by GitHub
parent 0cba42590f
commit 6bec1e3bb8
13 changed files with 1938 additions and 305 deletions

View File

@@ -13,7 +13,10 @@ using System.Diagnostics;
using System.Runtime.InteropServices;
using GitHub.Runner.Common;
using GitHub.Runner.Sdk;
using GitHub.Services.WebApi;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Test")]
namespace GitHub.Runner.Listener
{
[ServiceLocator(Default = typeof(MessageListener))]
@@ -32,18 +35,30 @@ namespace GitHub.Runner.Listener
private ITerminal _term;
private IRunnerServer _runnerServer;
private TaskAgentSession _session;
private ICredentialManager _credMgr;
private IConfigurationStore _configStore;
private TimeSpan _getNextMessageRetryInterval;
private readonly TimeSpan _sessionCreationRetryInterval = TimeSpan.FromSeconds(30);
private readonly TimeSpan _sessionConflictRetryLimit = TimeSpan.FromMinutes(4);
private readonly TimeSpan _clockSkewRetryLimit = TimeSpan.FromMinutes(30);
private readonly Dictionary<string, int> _sessionCreationExceptionTracker = new Dictionary<string, int>();
// Whether load credentials from .credentials_migrated file
internal bool _useMigratedCredentials;
// need to check auth url if there is only .credentials and auth schema is OAuth
internal bool _needToCheckAuthorizationUrlUpdate;
internal Task<VssCredentials> _authorizationUrlMigrationBackgroundTask;
internal Task _authorizationUrlRollbackReattemptDelayBackgroundTask;
public override void Initialize(IHostContext hostContext)
{
base.Initialize(hostContext);
_term = HostContext.GetService<ITerminal>();
_runnerServer = HostContext.GetService<IRunnerServer>();
_credMgr = HostContext.GetService<ICredentialManager>();
_configStore = HostContext.GetService<IConfigurationStore>();
}
public async Task<Boolean> CreateSessionAsync(CancellationToken token)
@@ -58,8 +73,8 @@ namespace GitHub.Runner.Listener
// Create connection.
Trace.Info("Loading Credentials");
var credMgr = HostContext.GetService<ICredentialManager>();
VssCredentials creds = credMgr.LoadCredentials();
_useMigratedCredentials = !StringUtil.ConvertToBoolean(Environment.GetEnvironmentVariable("GITHUB_ACTIONS_RUNNER_SPSAUTHURL"));
VssCredentials creds = _credMgr.LoadCredentials(_useMigratedCredentials);
var agent = new TaskAgentReference
{
@@ -74,6 +89,17 @@ namespace GitHub.Runner.Listener
string errorMessage = string.Empty;
bool encounteringError = false;
var originalCreds = _configStore.GetCredentials();
var migratedCreds = _configStore.GetMigratedCredentials();
if (migratedCreds == null)
{
_useMigratedCredentials = false;
if (originalCreds.Scheme == Constants.Configuration.OAuth)
{
_needToCheckAuthorizationUrlUpdate = true;
}
}
while (true)
{
token.ThrowIfCancellationRequested();
@@ -83,7 +109,7 @@ namespace GitHub.Runner.Listener
Trace.Info("Connecting to the Runner Server...");
await _runnerServer.ConnectAsync(new Uri(serverUrl), creds);
Trace.Info("VssConnection created");
_term.WriteLine();
_term.WriteSuccessMessage("Connected to GitHub");
_term.WriteLine();
@@ -101,6 +127,12 @@ namespace GitHub.Runner.Listener
encounteringError = false;
}
if (_needToCheckAuthorizationUrlUpdate)
{
// start background task try to get new authorization url
_authorizationUrlMigrationBackgroundTask = GetNewOAuthAuthorizationSetting(token);
}
return true;
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
@@ -120,8 +152,21 @@ namespace GitHub.Runner.Listener
if (!IsSessionCreationExceptionRetriable(ex))
{
_term.WriteError($"Failed to create session. {ex.Message}");
return false;
if (_useMigratedCredentials)
{
// migrated credentials might cause lose permission during permission check,
// we will force to use original credential and try again
_useMigratedCredentials = false;
var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
_authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
creds = _credMgr.LoadCredentials(false);
Trace.Error("Fallback to original credentials and try again.");
}
else
{
_term.WriteError($"Failed to create session. {ex.Message}");
return false;
}
}
if (!encounteringError) //print the message only on the first error
@@ -182,6 +227,51 @@ namespace GitHub.Runner.Listener
encounteringError = false;
continuousError = 0;
}
if (_needToCheckAuthorizationUrlUpdate &&
_authorizationUrlMigrationBackgroundTask?.IsCompleted == true)
{
if (HostContext.GetService<IJobDispatcher>().Busy ||
HostContext.GetService<ISelfUpdater>().Busy)
{
Trace.Info("Job or runner updates in progress, update credentials next time.");
}
else
{
try
{
var newCred = await _authorizationUrlMigrationBackgroundTask;
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), newCred);
Trace.Info("Updated connection to use migrated credential for next GetMessage call.");
_useMigratedCredentials = true;
_authorizationUrlMigrationBackgroundTask = null;
_needToCheckAuthorizationUrlUpdate = false;
}
catch (Exception ex)
{
Trace.Error("Fail to refresh connection with new authorization url.");
Trace.Error(ex);
}
}
}
if (_authorizationUrlRollbackReattemptDelayBackgroundTask?.IsCompleted == true)
{
try
{
// we rolled back to use original creds about 2 days before, now it's a good time to try migrated creds again.
Trace.Info("Re-attempt to use migrated credential");
var migratedCreds = _credMgr.LoadCredentials();
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedCreds);
_useMigratedCredentials = true;
_authorizationUrlRollbackReattemptDelayBackgroundTask = null;
}
catch (Exception ex)
{
Trace.Error("Fail to refresh connection with new authorization url on rollback reattempt.");
Trace.Error(ex);
}
}
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
@@ -205,7 +295,21 @@ namespace GitHub.Runner.Listener
}
else if (!IsGetNextMessageExceptionRetriable(ex))
{
throw;
if (_useMigratedCredentials)
{
// migrated credentials might cause lose permission during permission check,
// we will force to use original credential and try again
_useMigratedCredentials = false;
var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
_authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
var originalCreds = _credMgr.LoadCredentials(false);
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), originalCreds);
Trace.Error("Fallback to original credentials and try again.");
}
else
{
throw;
}
}
else
{
@@ -397,5 +501,80 @@ namespace GitHub.Runner.Listener
return true;
}
}
private async Task<VssCredentials> GetNewOAuthAuthorizationSetting(CancellationToken token)
{
Trace.Info("Start checking oauth authorization url update.");
while (true)
{
var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromMinutes(30), TimeSpan.FromMinutes(45));
await HostContext.Delay(backoff, token);
try
{
var migratedAuthorizationUrl = await _runnerServer.GetRunnerAuthUrlAsync(_settings.PoolId, _settings.AgentId);
if (!string.IsNullOrEmpty(migratedAuthorizationUrl))
{
var credData = _configStore.GetCredentials();
var clientId = credData.Data.GetValueOrDefault("clientId", null);
var currentAuthorizationUrl = credData.Data.GetValueOrDefault("authorizationUrl", null);
Trace.Info($"Current authorization url: {currentAuthorizationUrl}, new authorization url: {migratedAuthorizationUrl}");
if (string.Equals(currentAuthorizationUrl, migratedAuthorizationUrl, StringComparison.OrdinalIgnoreCase))
{
// We don't need to update credentials.
Trace.Info("No needs to update authorization url");
await Task.Delay(TimeSpan.FromMilliseconds(-1), token);
}
var keyManager = HostContext.GetService<IRSAKeyManager>();
var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());
var migratedClientCredential = new VssOAuthJwtBearerClientCredential(clientId, migratedAuthorizationUrl, signingCredentials);
var migratedRunnerCredential = new VssOAuthCredential(new Uri(migratedAuthorizationUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, migratedClientCredential);
Trace.Info("Try connect service with Token Service OAuth endpoint.");
var runnerServer = HostContext.CreateService<IRunnerServer>();
await runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedRunnerCredential);
await runnerServer.GetAgentPoolsAsync();
Trace.Info($"Successfully connected service with new authorization url.");
var migratedCredData = new CredentialData
{
Scheme = Constants.Configuration.OAuth,
Data =
{
{ "clientId", clientId },
{ "authorizationUrl", migratedAuthorizationUrl },
{ "oauthEndpointUrl", migratedAuthorizationUrl },
},
};
_configStore.SaveMigratedCredential(migratedCredData);
return migratedRunnerCredential;
}
else
{
Trace.Verbose("No authorization url updates");
}
}
catch (Exception ex)
{
Trace.Error("Fail to get/test new authorization url.");
Trace.Error(ex);
try
{
await _runnerServer.ReportRunnerAuthUrlErrorAsync(_settings.PoolId, _settings.AgentId, ex.ToString());
}
catch (Exception e)
{
// best effort
Trace.Error("Fail to report the migration error");
Trace.Error(e);
}
}
}
}
}
}