Switch to use token service instead of SPS for exchanging oauth token. (#325)

* Gracefully switch the runner to use Token Service instead of SPS.

* PR feedback.

* feedback2

* report error.
This commit is contained in:
Tingluo Huang
2020-03-04 21:40:58 -05:00
committed by GitHub
parent 0cba42590f
commit 6bec1e3bb8
13 changed files with 1938 additions and 305 deletions

View File

@@ -78,8 +78,10 @@ namespace GitHub.Runner.Common
bool IsServiceConfigured();
bool HasCredentials();
CredentialData GetCredentials();
CredentialData GetMigratedCredentials();
RunnerSettings GetSettings();
void SaveCredential(CredentialData credential);
void SaveMigratedCredential(CredentialData credential);
void SaveSettings(RunnerSettings settings);
void DeleteCredential();
void DeleteSettings();
@@ -90,9 +92,11 @@ namespace GitHub.Runner.Common
private string _binPath;
private string _configFilePath;
private string _credFilePath;
private string _migratedCredFilePath;
private string _serviceConfigFilePath;
private CredentialData _creds;
private CredentialData _migratedCreds;
private RunnerSettings _settings;
public override void Initialize(IHostContext hostContext)
@@ -114,6 +118,9 @@ namespace GitHub.Runner.Common
_credFilePath = hostContext.GetConfigFile(WellKnownConfigFile.Credentials);
Trace.Info("CredFilePath: {0}", _credFilePath);
_migratedCredFilePath = hostContext.GetConfigFile(WellKnownConfigFile.MigratedCredentials);
Trace.Info("MigratedCredFilePath: {0}", _migratedCredFilePath);
_serviceConfigFilePath = hostContext.GetConfigFile(WellKnownConfigFile.Service);
Trace.Info("ServiceConfigFilePath: {0}", _serviceConfigFilePath);
}
@@ -123,7 +130,7 @@ namespace GitHub.Runner.Common
public bool HasCredentials()
{
Trace.Info("HasCredentials()");
bool credsStored = (new FileInfo(_credFilePath)).Exists;
bool credsStored = (new FileInfo(_credFilePath)).Exists || (new FileInfo(_migratedCredFilePath)).Exists;
Trace.Info("stored {0}", credsStored);
return credsStored;
}
@@ -154,6 +161,16 @@ namespace GitHub.Runner.Common
return _creds;
}
public CredentialData GetMigratedCredentials()
{
if (_migratedCreds == null && File.Exists(_migratedCredFilePath))
{
_migratedCreds = IOUtil.LoadObject<CredentialData>(_migratedCredFilePath);
}
return _migratedCreds;
}
public RunnerSettings GetSettings()
{
if (_settings == null)
@@ -188,6 +205,21 @@ namespace GitHub.Runner.Common
File.SetAttributes(_credFilePath, File.GetAttributes(_credFilePath) | FileAttributes.Hidden);
}
public void SaveMigratedCredential(CredentialData credential)
{
Trace.Info("Saving {0} migrated credential @ {1}", credential.Scheme, _migratedCredFilePath);
if (File.Exists(_migratedCredFilePath))
{
// Delete existing credential file first, since the file is hidden and not able to overwrite.
Trace.Info("Delete exist runner migrated credential file.");
IOUtil.DeleteFile(_migratedCredFilePath);
}
IOUtil.SaveObject(credential, _migratedCredFilePath);
Trace.Info("Migrated Credentials Saved.");
File.SetAttributes(_migratedCredFilePath, File.GetAttributes(_migratedCredFilePath) | FileAttributes.Hidden);
}
public void SaveSettings(RunnerSettings settings)
{
Trace.Info("Saving runner settings.");
@@ -206,6 +238,7 @@ namespace GitHub.Runner.Common
public void DeleteCredential()
{
IOUtil.Delete(_credFilePath, default(CancellationToken));
IOUtil.Delete(_migratedCredFilePath, default(CancellationToken));
}
public void DeleteSettings()

View File

@@ -19,6 +19,7 @@ namespace GitHub.Runner.Common
{
Runner,
Credentials,
MigratedCredentials,
RSACredentials,
Service,
CredentialStore,

View File

@@ -281,6 +281,12 @@ namespace GitHub.Runner.Common
".credentials");
break;
case WellKnownConfigFile.MigratedCredentials:
path = Path.Combine(
GetDirectory(WellKnownDirectory.Root),
".credentials_migrated");
break;
case WellKnownConfigFile.RSACredentials:
path = Path.Combine(
GetDirectory(WellKnownDirectory.Root),

View File

@@ -50,6 +50,10 @@ namespace GitHub.Runner.Common
// agent update
Task<TaskAgent> UpdateAgentUpdateStateAsync(int agentPoolId, int agentId, string currentState);
// runner authorization url
Task<string> GetRunnerAuthUrlAsync(int runnerPoolId, int runnerId);
Task ReportRunnerAuthUrlErrorAsync(int runnerPoolId, int runnerId, string error);
}
public sealed class RunnerServer : RunnerService, IRunnerServer
@@ -334,5 +338,20 @@ namespace GitHub.Runner.Common
CheckConnection(RunnerConnectionType.Generic);
return _genericTaskAgentClient.UpdateAgentUpdateStateAsync(agentPoolId, agentId, currentState);
}
//-----------------------------------------------------------------
// Runner Auth Url
//-----------------------------------------------------------------
public Task<string> GetRunnerAuthUrlAsync(int runnerPoolId, int runnerId)
{
CheckConnection(RunnerConnectionType.MessageQueue);
return _messageTaskAgentClient.GetAgentAuthUrlAsync(runnerPoolId, runnerId);
}
public Task ReportRunnerAuthUrlErrorAsync(int runnerPoolId, int runnerId, string error)
{
CheckConnection(RunnerConnectionType.MessageQueue);
return _messageTaskAgentClient.ReportAgentAuthUrlMigrationErrorAsync(runnerPoolId, runnerId, error);
}
}
}

View File

@@ -13,7 +13,7 @@ namespace GitHub.Runner.Listener.Configuration
public interface ICredentialManager : IRunnerService
{
ICredentialProvider GetCredentialProvider(string credType);
VssCredentials LoadCredentials();
VssCredentials LoadCredentials(bool preferMigrated = true);
}
public class CredentialManager : RunnerService, ICredentialManager
@@ -40,7 +40,7 @@ namespace GitHub.Runner.Listener.Configuration
return creds;
}
public VssCredentials LoadCredentials()
public VssCredentials LoadCredentials(bool preferMigrated = true)
{
IConfigurationStore store = HostContext.GetService<IConfigurationStore>();
@@ -50,6 +50,16 @@ namespace GitHub.Runner.Listener.Configuration
}
CredentialData credData = store.GetCredentials();
if (preferMigrated)
{
var migratedCred = store.GetMigratedCredentials();
if (migratedCred != null)
{
credData = migratedCred;
}
}
ICredentialProvider credProv = GetCredentialProvider(credData.Scheme);
credProv.CredentialData = credData;

View File

@@ -1,6 +1,5 @@
using System;
using GitHub.Runner.Common;
using GitHub.Runner.Common.Util;
using GitHub.Runner.Sdk;
using GitHub.Services.Common;
using GitHub.Services.OAuth;
@@ -29,7 +28,7 @@ namespace GitHub.Runner.Listener.Configuration
var authorizationUrl = this.CredentialData.Data.GetValueOrDefault("authorizationUrl", null);
// For back compat with .credential file that doesn't has 'oauthEndpointUrl' section
var oathEndpointUrl = this.CredentialData.Data.GetValueOrDefault("oauthEndpointUrl", authorizationUrl);
var oauthEndpointUrl = this.CredentialData.Data.GetValueOrDefault("oauthEndpointUrl", authorizationUrl);
ArgUtil.NotNullOrEmpty(clientId, nameof(clientId));
ArgUtil.NotNullOrEmpty(authorizationUrl, nameof(authorizationUrl));
@@ -39,7 +38,7 @@ namespace GitHub.Runner.Listener.Configuration
var keyManager = context.GetService<IRSAKeyManager>();
var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());
var clientCredential = new VssOAuthJwtBearerClientCredential(clientId, authorizationUrl, signingCredentials);
var agentCredential = new VssOAuthCredential(new Uri(oathEndpointUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, clientCredential);
var agentCredential = new VssOAuthCredential(new Uri(oauthEndpointUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, clientCredential);
// Construct a credentials cache with a single OAuth credential for communication. The windows credential
// is explicitly set to null to ensure we never do that negotiation.

View File

@@ -18,6 +18,7 @@ namespace GitHub.Runner.Listener
[ServiceLocator(Default = typeof(JobDispatcher))]
public interface IJobDispatcher : IRunnerService
{
bool Busy { get; }
TaskCompletionSource<bool> RunOnceJobCompleted { get; }
void Run(Pipelines.AgentJobRequestMessage message, bool runOnce = false);
bool Cancel(JobCancelMessage message);
@@ -69,6 +70,8 @@ namespace GitHub.Runner.Listener
public TaskCompletionSource<bool> RunOnceJobCompleted => _runOnceJobCompleted;
public bool Busy { get; private set; }
public void Run(Pipelines.AgentJobRequestMessage jobRequestMessage, bool runOnce = false)
{
Trace.Info($"Job request {jobRequestMessage.RequestId} for plan {jobRequestMessage.Plan.PlanId} job {jobRequestMessage.JobId} received.");
@@ -247,7 +250,7 @@ namespace GitHub.Runner.Listener
Task completedTask = await Task.WhenAny(jobDispatch.WorkerDispatch, Task.Delay(TimeSpan.FromSeconds(45)));
if (completedTask != jobDispatch.WorkerDispatch)
{
// at this point, the job exectuion might encounter some dead lock and even not able to be canclled.
// at this point, the job execution might encounter some dead lock and even not able to be cancelled.
// no need to localize the exception string should never happen.
throw new InvalidOperationException($"Job dispatch process for {jobDispatch.JobId} has encountered unexpected error, the dispatch task is not able to be canceled within 45 seconds.");
}
@@ -295,6 +298,9 @@ namespace GitHub.Runner.Listener
}
private async Task RunAsync(Pipelines.AgentJobRequestMessage message, WorkerDispatcher previousJobDispatch, CancellationToken jobRequestCancellationToken, CancellationToken workerCancelTimeoutKillToken)
{
Busy = true;
try
{
if (previousJobDispatch != null)
{
@@ -595,6 +601,11 @@ namespace GitHub.Runner.Listener
}
}
}
finally
{
Busy = false;
}
}
public async Task RenewJobRequestAsync(int poolId, long requestId, Guid lockToken, TaskCompletionSource<int> firstJobRequestRenewed, CancellationToken token)
{

View File

@@ -13,7 +13,10 @@ using System.Diagnostics;
using System.Runtime.InteropServices;
using GitHub.Runner.Common;
using GitHub.Runner.Sdk;
using GitHub.Services.WebApi;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Test")]
namespace GitHub.Runner.Listener
{
[ServiceLocator(Default = typeof(MessageListener))]
@@ -32,18 +35,30 @@ namespace GitHub.Runner.Listener
private ITerminal _term;
private IRunnerServer _runnerServer;
private TaskAgentSession _session;
private ICredentialManager _credMgr;
private IConfigurationStore _configStore;
private TimeSpan _getNextMessageRetryInterval;
private readonly TimeSpan _sessionCreationRetryInterval = TimeSpan.FromSeconds(30);
private readonly TimeSpan _sessionConflictRetryLimit = TimeSpan.FromMinutes(4);
private readonly TimeSpan _clockSkewRetryLimit = TimeSpan.FromMinutes(30);
private readonly Dictionary<string, int> _sessionCreationExceptionTracker = new Dictionary<string, int>();
// Whether load credentials from .credentials_migrated file
internal bool _useMigratedCredentials;
// need to check auth url if there is only .credentials and auth schema is OAuth
internal bool _needToCheckAuthorizationUrlUpdate;
internal Task<VssCredentials> _authorizationUrlMigrationBackgroundTask;
internal Task _authorizationUrlRollbackReattemptDelayBackgroundTask;
public override void Initialize(IHostContext hostContext)
{
base.Initialize(hostContext);
_term = HostContext.GetService<ITerminal>();
_runnerServer = HostContext.GetService<IRunnerServer>();
_credMgr = HostContext.GetService<ICredentialManager>();
_configStore = HostContext.GetService<IConfigurationStore>();
}
public async Task<Boolean> CreateSessionAsync(CancellationToken token)
@@ -58,8 +73,8 @@ namespace GitHub.Runner.Listener
// Create connection.
Trace.Info("Loading Credentials");
var credMgr = HostContext.GetService<ICredentialManager>();
VssCredentials creds = credMgr.LoadCredentials();
_useMigratedCredentials = !StringUtil.ConvertToBoolean(Environment.GetEnvironmentVariable("GITHUB_ACTIONS_RUNNER_SPSAUTHURL"));
VssCredentials creds = _credMgr.LoadCredentials(_useMigratedCredentials);
var agent = new TaskAgentReference
{
@@ -74,6 +89,17 @@ namespace GitHub.Runner.Listener
string errorMessage = string.Empty;
bool encounteringError = false;
var originalCreds = _configStore.GetCredentials();
var migratedCreds = _configStore.GetMigratedCredentials();
if (migratedCreds == null)
{
_useMigratedCredentials = false;
if (originalCreds.Scheme == Constants.Configuration.OAuth)
{
_needToCheckAuthorizationUrlUpdate = true;
}
}
while (true)
{
token.ThrowIfCancellationRequested();
@@ -101,6 +127,12 @@ namespace GitHub.Runner.Listener
encounteringError = false;
}
if (_needToCheckAuthorizationUrlUpdate)
{
// start background task try to get new authorization url
_authorizationUrlMigrationBackgroundTask = GetNewOAuthAuthorizationSetting(token);
}
return true;
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
@@ -119,10 +151,23 @@ namespace GitHub.Runner.Listener
Trace.Error(ex);
if (!IsSessionCreationExceptionRetriable(ex))
{
if (_useMigratedCredentials)
{
// migrated credentials might cause lose permission during permission check,
// we will force to use original credential and try again
_useMigratedCredentials = false;
var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
_authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
creds = _credMgr.LoadCredentials(false);
Trace.Error("Fallback to original credentials and try again.");
}
else
{
_term.WriteError($"Failed to create session. {ex.Message}");
return false;
}
}
if (!encounteringError) //print the message only on the first error
{
@@ -182,6 +227,51 @@ namespace GitHub.Runner.Listener
encounteringError = false;
continuousError = 0;
}
if (_needToCheckAuthorizationUrlUpdate &&
_authorizationUrlMigrationBackgroundTask?.IsCompleted == true)
{
if (HostContext.GetService<IJobDispatcher>().Busy ||
HostContext.GetService<ISelfUpdater>().Busy)
{
Trace.Info("Job or runner updates in progress, update credentials next time.");
}
else
{
try
{
var newCred = await _authorizationUrlMigrationBackgroundTask;
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), newCred);
Trace.Info("Updated connection to use migrated credential for next GetMessage call.");
_useMigratedCredentials = true;
_authorizationUrlMigrationBackgroundTask = null;
_needToCheckAuthorizationUrlUpdate = false;
}
catch (Exception ex)
{
Trace.Error("Fail to refresh connection with new authorization url.");
Trace.Error(ex);
}
}
}
if (_authorizationUrlRollbackReattemptDelayBackgroundTask?.IsCompleted == true)
{
try
{
// we rolled back to use original creds about 2 days before, now it's a good time to try migrated creds again.
Trace.Info("Re-attempt to use migrated credential");
var migratedCreds = _credMgr.LoadCredentials();
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedCreds);
_useMigratedCredentials = true;
_authorizationUrlRollbackReattemptDelayBackgroundTask = null;
}
catch (Exception ex)
{
Trace.Error("Fail to refresh connection with new authorization url on rollback reattempt.");
Trace.Error(ex);
}
}
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
@@ -204,9 +294,23 @@ namespace GitHub.Runner.Listener
Trace.Info($"{nameof(TaskAgentSessionExpiredException)} received, recovered by recreate session.");
}
else if (!IsGetNextMessageExceptionRetriable(ex))
{
if (_useMigratedCredentials)
{
// migrated credentials might cause lose permission during permission check,
// we will force to use original credential and try again
_useMigratedCredentials = false;
var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
_authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
var originalCreds = _credMgr.LoadCredentials(false);
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), originalCreds);
Trace.Error("Fallback to original credentials and try again.");
}
else
{
throw;
}
}
else
{
continuousError++;
@@ -397,5 +501,80 @@ namespace GitHub.Runner.Listener
return true;
}
}
private async Task<VssCredentials> GetNewOAuthAuthorizationSetting(CancellationToken token)
{
Trace.Info("Start checking oauth authorization url update.");
while (true)
{
var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromMinutes(30), TimeSpan.FromMinutes(45));
await HostContext.Delay(backoff, token);
try
{
var migratedAuthorizationUrl = await _runnerServer.GetRunnerAuthUrlAsync(_settings.PoolId, _settings.AgentId);
if (!string.IsNullOrEmpty(migratedAuthorizationUrl))
{
var credData = _configStore.GetCredentials();
var clientId = credData.Data.GetValueOrDefault("clientId", null);
var currentAuthorizationUrl = credData.Data.GetValueOrDefault("authorizationUrl", null);
Trace.Info($"Current authorization url: {currentAuthorizationUrl}, new authorization url: {migratedAuthorizationUrl}");
if (string.Equals(currentAuthorizationUrl, migratedAuthorizationUrl, StringComparison.OrdinalIgnoreCase))
{
// We don't need to update credentials.
Trace.Info("No needs to update authorization url");
await Task.Delay(TimeSpan.FromMilliseconds(-1), token);
}
var keyManager = HostContext.GetService<IRSAKeyManager>();
var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());
var migratedClientCredential = new VssOAuthJwtBearerClientCredential(clientId, migratedAuthorizationUrl, signingCredentials);
var migratedRunnerCredential = new VssOAuthCredential(new Uri(migratedAuthorizationUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, migratedClientCredential);
Trace.Info("Try connect service with Token Service OAuth endpoint.");
var runnerServer = HostContext.CreateService<IRunnerServer>();
await runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedRunnerCredential);
await runnerServer.GetAgentPoolsAsync();
Trace.Info($"Successfully connected service with new authorization url.");
var migratedCredData = new CredentialData
{
Scheme = Constants.Configuration.OAuth,
Data =
{
{ "clientId", clientId },
{ "authorizationUrl", migratedAuthorizationUrl },
{ "oauthEndpointUrl", migratedAuthorizationUrl },
},
};
_configStore.SaveMigratedCredential(migratedCredData);
return migratedRunnerCredential;
}
else
{
Trace.Verbose("No authorization url updates");
}
}
catch (Exception ex)
{
Trace.Error("Fail to get/test new authorization url.");
Trace.Error(ex);
try
{
await _runnerServer.ReportRunnerAuthUrlErrorAsync(_settings.PoolId, _settings.AgentId, ex.ToString());
}
catch (Exception e)
{
// best effort
Trace.Error("Fail to report the migration error");
Trace.Error(e);
}
}
}
}
}
}

View File

@@ -17,6 +17,7 @@ namespace GitHub.Runner.Listener
[ServiceLocator(Default = typeof(SelfUpdater))]
public interface ISelfUpdater : IRunnerService
{
bool Busy { get; }
Task<bool> SelfUpdate(AgentRefreshMessage updateMessage, IJobDispatcher jobDispatcher, bool restartInteractiveRunner, CancellationToken token);
}
@@ -31,6 +32,8 @@ namespace GitHub.Runner.Listener
private int _poolId;
private int _agentId;
public bool Busy { get; private set; }
public override void Initialize(IHostContext hostContext)
{
base.Initialize(hostContext);
@@ -44,6 +47,9 @@ namespace GitHub.Runner.Listener
}
public async Task<bool> SelfUpdate(AgentRefreshMessage updateMessage, IJobDispatcher jobDispatcher, bool restartInteractiveRunner, CancellationToken token)
{
Busy = true;
try
{
if (!await UpdateNeeded(updateMessage.TargetVersion, token))
{
@@ -92,6 +98,11 @@ namespace GitHub.Runner.Listener
return true;
}
finally
{
Busy = false;
}
}
private async Task<bool> UpdateNeeded(string targetVersion, CancellationToken token)
{

View File

@@ -779,5 +779,65 @@ namespace GitHub.DistributedTask.WebApi
userState: userState,
cancellationToken: cancellationToken);
}
/// <summary>
/// [Preview API]
/// </summary>
/// <param name="poolId"></param>
/// <param name="agentId"></param>
/// <param name="userState"></param>
/// <param name="cancellationToken">The cancellation token to cancel operation.</param>
public Task<String> GetAgentAuthUrlAsync(
int poolId,
int agentId,
object userState = null,
CancellationToken cancellationToken = default)
{
HttpMethod httpMethod = new HttpMethod("GET");
Guid locationId = new Guid("a82a119c-1e46-44b6-8d75-c82a79cf975b");
object routeValues = new { poolId = poolId, agentId = agentId };
return SendAsync<String>(
httpMethod,
locationId,
routeValues: routeValues,
version: new ApiResourceVersion(6.0, 1),
userState: userState,
cancellationToken: cancellationToken);
}
/// <summary>
/// [Preview API]
/// </summary>
/// <param name="poolId"></param>
/// <param name="agentId"></param>
/// <param name="error"></param>
/// <param name="userState"></param>
/// <param name="cancellationToken">The cancellation token to cancel operation.</param>
[EditorBrowsable(EditorBrowsableState.Never)]
public virtual async Task ReportAgentAuthUrlMigrationErrorAsync(
int poolId,
int agentId,
string error,
object userState = null,
CancellationToken cancellationToken = default)
{
HttpMethod httpMethod = new HttpMethod("POST");
Guid locationId = new Guid("a82a119c-1e46-44b6-8d75-c82a79cf975b");
object routeValues = new { poolId = poolId, agentId = agentId };
HttpContent content = new ObjectContent<string>(error, new VssJsonMediaTypeFormatter(true));
using (HttpResponseMessage response = await SendAsync(
httpMethod,
locationId,
routeValues: routeValues,
version: new ApiResourceVersion(6.0, 1),
userState: userState,
cancellationToken: cancellationToken,
content: content).ConfigureAwait(false))
{
return;
}
}
}
}

View File

@@ -260,5 +260,8 @@ namespace GitHub.DistributedTask.WebApi
public static readonly Guid CheckpointResourcesLocationId = new Guid(CheckpointResourcesLocationIdString);
public const String CheckpointResourcesResource = "references";
public static readonly Guid RunnerAuthUrl = new Guid("{A82A119C-1E46-44B6-8D75-C82A79CF975B}");
public const string RunnerAuthUrlResource = "authurl";
}
}

File diff suppressed because it is too large Load Diff