mirror of
https://github.com/actions/runner.git
synced 2025-12-12 05:37:01 +00:00
Switch to use token service instead of SPS for exchanging oauth token. (#325)
* Gracefully switch the runner to use Token Service instead of SPS. * PR feedback. * feedback2 * report error.
This commit is contained in:
@@ -78,8 +78,10 @@ namespace GitHub.Runner.Common
|
||||
bool IsServiceConfigured();
|
||||
bool HasCredentials();
|
||||
CredentialData GetCredentials();
|
||||
CredentialData GetMigratedCredentials();
|
||||
RunnerSettings GetSettings();
|
||||
void SaveCredential(CredentialData credential);
|
||||
void SaveMigratedCredential(CredentialData credential);
|
||||
void SaveSettings(RunnerSettings settings);
|
||||
void DeleteCredential();
|
||||
void DeleteSettings();
|
||||
@@ -90,9 +92,11 @@ namespace GitHub.Runner.Common
|
||||
private string _binPath;
|
||||
private string _configFilePath;
|
||||
private string _credFilePath;
|
||||
private string _migratedCredFilePath;
|
||||
private string _serviceConfigFilePath;
|
||||
|
||||
private CredentialData _creds;
|
||||
private CredentialData _migratedCreds;
|
||||
private RunnerSettings _settings;
|
||||
|
||||
public override void Initialize(IHostContext hostContext)
|
||||
@@ -114,6 +118,9 @@ namespace GitHub.Runner.Common
|
||||
_credFilePath = hostContext.GetConfigFile(WellKnownConfigFile.Credentials);
|
||||
Trace.Info("CredFilePath: {0}", _credFilePath);
|
||||
|
||||
_migratedCredFilePath = hostContext.GetConfigFile(WellKnownConfigFile.MigratedCredentials);
|
||||
Trace.Info("MigratedCredFilePath: {0}", _migratedCredFilePath);
|
||||
|
||||
_serviceConfigFilePath = hostContext.GetConfigFile(WellKnownConfigFile.Service);
|
||||
Trace.Info("ServiceConfigFilePath: {0}", _serviceConfigFilePath);
|
||||
}
|
||||
@@ -123,7 +130,7 @@ namespace GitHub.Runner.Common
|
||||
public bool HasCredentials()
|
||||
{
|
||||
Trace.Info("HasCredentials()");
|
||||
bool credsStored = (new FileInfo(_credFilePath)).Exists;
|
||||
bool credsStored = (new FileInfo(_credFilePath)).Exists || (new FileInfo(_migratedCredFilePath)).Exists;
|
||||
Trace.Info("stored {0}", credsStored);
|
||||
return credsStored;
|
||||
}
|
||||
@@ -154,6 +161,16 @@ namespace GitHub.Runner.Common
|
||||
return _creds;
|
||||
}
|
||||
|
||||
public CredentialData GetMigratedCredentials()
|
||||
{
|
||||
if (_migratedCreds == null && File.Exists(_migratedCredFilePath))
|
||||
{
|
||||
_migratedCreds = IOUtil.LoadObject<CredentialData>(_migratedCredFilePath);
|
||||
}
|
||||
|
||||
return _migratedCreds;
|
||||
}
|
||||
|
||||
public RunnerSettings GetSettings()
|
||||
{
|
||||
if (_settings == null)
|
||||
@@ -188,6 +205,21 @@ namespace GitHub.Runner.Common
|
||||
File.SetAttributes(_credFilePath, File.GetAttributes(_credFilePath) | FileAttributes.Hidden);
|
||||
}
|
||||
|
||||
public void SaveMigratedCredential(CredentialData credential)
|
||||
{
|
||||
Trace.Info("Saving {0} migrated credential @ {1}", credential.Scheme, _migratedCredFilePath);
|
||||
if (File.Exists(_migratedCredFilePath))
|
||||
{
|
||||
// Delete existing credential file first, since the file is hidden and not able to overwrite.
|
||||
Trace.Info("Delete exist runner migrated credential file.");
|
||||
IOUtil.DeleteFile(_migratedCredFilePath);
|
||||
}
|
||||
|
||||
IOUtil.SaveObject(credential, _migratedCredFilePath);
|
||||
Trace.Info("Migrated Credentials Saved.");
|
||||
File.SetAttributes(_migratedCredFilePath, File.GetAttributes(_migratedCredFilePath) | FileAttributes.Hidden);
|
||||
}
|
||||
|
||||
public void SaveSettings(RunnerSettings settings)
|
||||
{
|
||||
Trace.Info("Saving runner settings.");
|
||||
@@ -206,6 +238,7 @@ namespace GitHub.Runner.Common
|
||||
public void DeleteCredential()
|
||||
{
|
||||
IOUtil.Delete(_credFilePath, default(CancellationToken));
|
||||
IOUtil.Delete(_migratedCredFilePath, default(CancellationToken));
|
||||
}
|
||||
|
||||
public void DeleteSettings()
|
||||
|
||||
@@ -19,6 +19,7 @@ namespace GitHub.Runner.Common
|
||||
{
|
||||
Runner,
|
||||
Credentials,
|
||||
MigratedCredentials,
|
||||
RSACredentials,
|
||||
Service,
|
||||
CredentialStore,
|
||||
|
||||
@@ -281,6 +281,12 @@ namespace GitHub.Runner.Common
|
||||
".credentials");
|
||||
break;
|
||||
|
||||
case WellKnownConfigFile.MigratedCredentials:
|
||||
path = Path.Combine(
|
||||
GetDirectory(WellKnownDirectory.Root),
|
||||
".credentials_migrated");
|
||||
break;
|
||||
|
||||
case WellKnownConfigFile.RSACredentials:
|
||||
path = Path.Combine(
|
||||
GetDirectory(WellKnownDirectory.Root),
|
||||
|
||||
@@ -50,6 +50,10 @@ namespace GitHub.Runner.Common
|
||||
|
||||
// agent update
|
||||
Task<TaskAgent> UpdateAgentUpdateStateAsync(int agentPoolId, int agentId, string currentState);
|
||||
|
||||
// runner authorization url
|
||||
Task<string> GetRunnerAuthUrlAsync(int runnerPoolId, int runnerId);
|
||||
Task ReportRunnerAuthUrlErrorAsync(int runnerPoolId, int runnerId, string error);
|
||||
}
|
||||
|
||||
public sealed class RunnerServer : RunnerService, IRunnerServer
|
||||
@@ -334,5 +338,20 @@ namespace GitHub.Runner.Common
|
||||
CheckConnection(RunnerConnectionType.Generic);
|
||||
return _genericTaskAgentClient.UpdateAgentUpdateStateAsync(agentPoolId, agentId, currentState);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------
|
||||
// Runner Auth Url
|
||||
//-----------------------------------------------------------------
|
||||
public Task<string> GetRunnerAuthUrlAsync(int runnerPoolId, int runnerId)
|
||||
{
|
||||
CheckConnection(RunnerConnectionType.MessageQueue);
|
||||
return _messageTaskAgentClient.GetAgentAuthUrlAsync(runnerPoolId, runnerId);
|
||||
}
|
||||
|
||||
public Task ReportRunnerAuthUrlErrorAsync(int runnerPoolId, int runnerId, string error)
|
||||
{
|
||||
CheckConnection(RunnerConnectionType.MessageQueue);
|
||||
return _messageTaskAgentClient.ReportAgentAuthUrlMigrationErrorAsync(runnerPoolId, runnerId, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ namespace GitHub.Runner.Listener.Configuration
|
||||
public interface ICredentialManager : IRunnerService
|
||||
{
|
||||
ICredentialProvider GetCredentialProvider(string credType);
|
||||
VssCredentials LoadCredentials();
|
||||
VssCredentials LoadCredentials(bool preferMigrated = true);
|
||||
}
|
||||
|
||||
public class CredentialManager : RunnerService, ICredentialManager
|
||||
@@ -40,7 +40,7 @@ namespace GitHub.Runner.Listener.Configuration
|
||||
return creds;
|
||||
}
|
||||
|
||||
public VssCredentials LoadCredentials()
|
||||
public VssCredentials LoadCredentials(bool preferMigrated = true)
|
||||
{
|
||||
IConfigurationStore store = HostContext.GetService<IConfigurationStore>();
|
||||
|
||||
@@ -50,6 +50,16 @@ namespace GitHub.Runner.Listener.Configuration
|
||||
}
|
||||
|
||||
CredentialData credData = store.GetCredentials();
|
||||
|
||||
if (preferMigrated)
|
||||
{
|
||||
var migratedCred = store.GetMigratedCredentials();
|
||||
if (migratedCred != null)
|
||||
{
|
||||
credData = migratedCred;
|
||||
}
|
||||
}
|
||||
|
||||
ICredentialProvider credProv = GetCredentialProvider(credData.Scheme);
|
||||
credProv.CredentialData = credData;
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using GitHub.Runner.Common;
|
||||
using GitHub.Runner.Common.Util;
|
||||
using GitHub.Runner.Sdk;
|
||||
using GitHub.Services.Common;
|
||||
using GitHub.Services.OAuth;
|
||||
@@ -29,7 +28,7 @@ namespace GitHub.Runner.Listener.Configuration
|
||||
var authorizationUrl = this.CredentialData.Data.GetValueOrDefault("authorizationUrl", null);
|
||||
|
||||
// For back compat with .credential file that doesn't has 'oauthEndpointUrl' section
|
||||
var oathEndpointUrl = this.CredentialData.Data.GetValueOrDefault("oauthEndpointUrl", authorizationUrl);
|
||||
var oauthEndpointUrl = this.CredentialData.Data.GetValueOrDefault("oauthEndpointUrl", authorizationUrl);
|
||||
|
||||
ArgUtil.NotNullOrEmpty(clientId, nameof(clientId));
|
||||
ArgUtil.NotNullOrEmpty(authorizationUrl, nameof(authorizationUrl));
|
||||
@@ -39,7 +38,7 @@ namespace GitHub.Runner.Listener.Configuration
|
||||
var keyManager = context.GetService<IRSAKeyManager>();
|
||||
var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());
|
||||
var clientCredential = new VssOAuthJwtBearerClientCredential(clientId, authorizationUrl, signingCredentials);
|
||||
var agentCredential = new VssOAuthCredential(new Uri(oathEndpointUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, clientCredential);
|
||||
var agentCredential = new VssOAuthCredential(new Uri(oauthEndpointUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, clientCredential);
|
||||
|
||||
// Construct a credentials cache with a single OAuth credential for communication. The windows credential
|
||||
// is explicitly set to null to ensure we never do that negotiation.
|
||||
|
||||
@@ -18,6 +18,7 @@ namespace GitHub.Runner.Listener
|
||||
[ServiceLocator(Default = typeof(JobDispatcher))]
|
||||
public interface IJobDispatcher : IRunnerService
|
||||
{
|
||||
bool Busy { get; }
|
||||
TaskCompletionSource<bool> RunOnceJobCompleted { get; }
|
||||
void Run(Pipelines.AgentJobRequestMessage message, bool runOnce = false);
|
||||
bool Cancel(JobCancelMessage message);
|
||||
@@ -69,6 +70,8 @@ namespace GitHub.Runner.Listener
|
||||
|
||||
public TaskCompletionSource<bool> RunOnceJobCompleted => _runOnceJobCompleted;
|
||||
|
||||
public bool Busy { get; private set; }
|
||||
|
||||
public void Run(Pipelines.AgentJobRequestMessage jobRequestMessage, bool runOnce = false)
|
||||
{
|
||||
Trace.Info($"Job request {jobRequestMessage.RequestId} for plan {jobRequestMessage.Plan.PlanId} job {jobRequestMessage.JobId} received.");
|
||||
@@ -247,7 +250,7 @@ namespace GitHub.Runner.Listener
|
||||
Task completedTask = await Task.WhenAny(jobDispatch.WorkerDispatch, Task.Delay(TimeSpan.FromSeconds(45)));
|
||||
if (completedTask != jobDispatch.WorkerDispatch)
|
||||
{
|
||||
// at this point, the job exectuion might encounter some dead lock and even not able to be canclled.
|
||||
// at this point, the job execution might encounter some dead lock and even not able to be cancelled.
|
||||
// no need to localize the exception string should never happen.
|
||||
throw new InvalidOperationException($"Job dispatch process for {jobDispatch.JobId} has encountered unexpected error, the dispatch task is not able to be canceled within 45 seconds.");
|
||||
}
|
||||
@@ -296,190 +299,290 @@ namespace GitHub.Runner.Listener
|
||||
|
||||
private async Task RunAsync(Pipelines.AgentJobRequestMessage message, WorkerDispatcher previousJobDispatch, CancellationToken jobRequestCancellationToken, CancellationToken workerCancelTimeoutKillToken)
|
||||
{
|
||||
if (previousJobDispatch != null)
|
||||
Busy = true;
|
||||
try
|
||||
{
|
||||
Trace.Verbose($"Make sure the previous job request {previousJobDispatch.JobId} has successfully finished on worker.");
|
||||
await EnsureDispatchFinished(previousJobDispatch);
|
||||
}
|
||||
else
|
||||
{
|
||||
Trace.Verbose($"This is the first job request.");
|
||||
}
|
||||
|
||||
var term = HostContext.GetService<ITerminal>();
|
||||
term.WriteLine($"{DateTime.UtcNow:u}: Running job: {message.JobDisplayName}");
|
||||
|
||||
// first job request renew succeed.
|
||||
TaskCompletionSource<int> firstJobRequestRenewed = new TaskCompletionSource<int>();
|
||||
var notification = HostContext.GetService<IJobNotification>();
|
||||
|
||||
// lock renew cancellation token.
|
||||
using (var lockRenewalTokenSource = new CancellationTokenSource())
|
||||
using (var workerProcessCancelTokenSource = new CancellationTokenSource())
|
||||
{
|
||||
long requestId = message.RequestId;
|
||||
Guid lockToken = Guid.Empty; // lockToken has never been used, keep this here of compat
|
||||
|
||||
// start renew job request
|
||||
Trace.Info($"Start renew job request {requestId} for job {message.JobId}.");
|
||||
Task renewJobRequest = RenewJobRequestAsync(_poolId, requestId, lockToken, firstJobRequestRenewed, lockRenewalTokenSource.Token);
|
||||
|
||||
// wait till first renew succeed or job request is canceled
|
||||
// not even start worker if the first renew fail
|
||||
await Task.WhenAny(firstJobRequestRenewed.Task, renewJobRequest, Task.Delay(-1, jobRequestCancellationToken));
|
||||
|
||||
if (renewJobRequest.IsCompleted)
|
||||
if (previousJobDispatch != null)
|
||||
{
|
||||
// renew job request task complete means we run out of retry for the first job request renew.
|
||||
Trace.Info($"Unable to renew job request for job {message.JobId} for the first time, stop dispatching job to worker.");
|
||||
return;
|
||||
Trace.Verbose($"Make sure the previous job request {previousJobDispatch.JobId} has successfully finished on worker.");
|
||||
await EnsureDispatchFinished(previousJobDispatch);
|
||||
}
|
||||
else
|
||||
{
|
||||
Trace.Verbose($"This is the first job request.");
|
||||
}
|
||||
|
||||
if (jobRequestCancellationToken.IsCancellationRequested)
|
||||
var term = HostContext.GetService<ITerminal>();
|
||||
term.WriteLine($"{DateTime.UtcNow:u}: Running job: {message.JobDisplayName}");
|
||||
|
||||
// first job request renew succeed.
|
||||
TaskCompletionSource<int> firstJobRequestRenewed = new TaskCompletionSource<int>();
|
||||
var notification = HostContext.GetService<IJobNotification>();
|
||||
|
||||
// lock renew cancellation token.
|
||||
using (var lockRenewalTokenSource = new CancellationTokenSource())
|
||||
using (var workerProcessCancelTokenSource = new CancellationTokenSource())
|
||||
{
|
||||
Trace.Info($"Stop renew job request for job {message.JobId}.");
|
||||
// stop renew lock
|
||||
lockRenewalTokenSource.Cancel();
|
||||
// renew job request should never blows up.
|
||||
await renewJobRequest;
|
||||
long requestId = message.RequestId;
|
||||
Guid lockToken = Guid.Empty; // lockToken has never been used, keep this here of compat
|
||||
|
||||
// complete job request with result Cancelled
|
||||
await CompleteJobRequestAsync(_poolId, message, lockToken, TaskResult.Canceled);
|
||||
return;
|
||||
}
|
||||
// start renew job request
|
||||
Trace.Info($"Start renew job request {requestId} for job {message.JobId}.");
|
||||
Task renewJobRequest = RenewJobRequestAsync(_poolId, requestId, lockToken, firstJobRequestRenewed, lockRenewalTokenSource.Token);
|
||||
|
||||
HostContext.WritePerfCounter($"JobRequestRenewed_{requestId.ToString()}");
|
||||
// wait till first renew succeed or job request is canceled
|
||||
// not even start worker if the first renew fail
|
||||
await Task.WhenAny(firstJobRequestRenewed.Task, renewJobRequest, Task.Delay(-1, jobRequestCancellationToken));
|
||||
|
||||
Task<int> workerProcessTask = null;
|
||||
object _outputLock = new object();
|
||||
List<string> workerOutput = new List<string>();
|
||||
using (var processChannel = HostContext.CreateService<IProcessChannel>())
|
||||
using (var processInvoker = HostContext.CreateService<IProcessInvoker>())
|
||||
{
|
||||
// Start the process channel.
|
||||
// It's OK if StartServer bubbles an execption after the worker process has already started.
|
||||
// The worker will shutdown after 30 seconds if it hasn't received the job message.
|
||||
processChannel.StartServer(
|
||||
// Delegate to start the child process.
|
||||
startProcess: (string pipeHandleOut, string pipeHandleIn) =>
|
||||
{
|
||||
// Validate args.
|
||||
ArgUtil.NotNullOrEmpty(pipeHandleOut, nameof(pipeHandleOut));
|
||||
ArgUtil.NotNullOrEmpty(pipeHandleIn, nameof(pipeHandleIn));
|
||||
|
||||
// Save STDOUT from worker, worker will use STDOUT report unhandle exception.
|
||||
processInvoker.OutputDataReceived += delegate (object sender, ProcessDataReceivedEventArgs stdout)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(stdout.Data))
|
||||
{
|
||||
lock (_outputLock)
|
||||
{
|
||||
workerOutput.Add(stdout.Data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Save STDERR from worker, worker will use STDERR on crash.
|
||||
processInvoker.ErrorDataReceived += delegate (object sender, ProcessDataReceivedEventArgs stderr)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(stderr.Data))
|
||||
{
|
||||
lock (_outputLock)
|
||||
{
|
||||
workerOutput.Add(stderr.Data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Start the child process.
|
||||
HostContext.WritePerfCounter("StartingWorkerProcess");
|
||||
var assemblyDirectory = HostContext.GetDirectory(WellKnownDirectory.Bin);
|
||||
string workerFileName = Path.Combine(assemblyDirectory, _workerProcessName);
|
||||
workerProcessTask = processInvoker.ExecuteAsync(
|
||||
workingDirectory: assemblyDirectory,
|
||||
fileName: workerFileName,
|
||||
arguments: "spawnclient " + pipeHandleOut + " " + pipeHandleIn,
|
||||
environment: null,
|
||||
requireExitCodeZero: false,
|
||||
outputEncoding: null,
|
||||
killProcessOnCancel: true,
|
||||
redirectStandardIn: null,
|
||||
inheritConsoleHandler: false,
|
||||
keepStandardInOpen: false,
|
||||
highPriorityProcess: true,
|
||||
cancellationToken: workerProcessCancelTokenSource.Token);
|
||||
});
|
||||
|
||||
// Send the job request message.
|
||||
// Kill the worker process if sending the job message times out. The worker
|
||||
// process may have successfully received the job message.
|
||||
try
|
||||
if (renewJobRequest.IsCompleted)
|
||||
{
|
||||
Trace.Info($"Send job request message to worker for job {message.JobId}.");
|
||||
HostContext.WritePerfCounter($"RunnerSendingJobToWorker_{message.JobId}");
|
||||
using (var csSendJobRequest = new CancellationTokenSource(_channelTimeout))
|
||||
{
|
||||
await processChannel.SendAsync(
|
||||
messageType: MessageType.NewJobRequest,
|
||||
body: JsonUtility.ToString(message),
|
||||
cancellationToken: csSendJobRequest.Token);
|
||||
}
|
||||
// renew job request task complete means we run out of retry for the first job request renew.
|
||||
Trace.Info($"Unable to renew job request for job {message.JobId} for the first time, stop dispatching job to worker.");
|
||||
return;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// message send been cancelled.
|
||||
// timeout 30 sec. kill worker.
|
||||
Trace.Info($"Job request message sending for job {message.JobId} been cancelled, kill running worker.");
|
||||
workerProcessCancelTokenSource.Cancel();
|
||||
try
|
||||
{
|
||||
await workerProcessTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Trace.Info("worker process has been killed.");
|
||||
}
|
||||
|
||||
if (jobRequestCancellationToken.IsCancellationRequested)
|
||||
{
|
||||
Trace.Info($"Stop renew job request for job {message.JobId}.");
|
||||
// stop renew lock
|
||||
lockRenewalTokenSource.Cancel();
|
||||
// renew job request should never blows up.
|
||||
await renewJobRequest;
|
||||
|
||||
// not finish the job request since the job haven't run on worker at all, we will not going to set a result to server.
|
||||
// complete job request with result Cancelled
|
||||
await CompleteJobRequestAsync(_poolId, message, lockToken, TaskResult.Canceled);
|
||||
return;
|
||||
}
|
||||
|
||||
// we get first jobrequest renew succeed and start the worker process with the job message.
|
||||
// send notification to machine provisioner.
|
||||
var systemConnection = message.Resources.Endpoints.SingleOrDefault(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase));
|
||||
var accessToken = systemConnection?.Authorization?.Parameters["AccessToken"];
|
||||
notification.JobStarted(message.JobId, accessToken, systemConnection.Url);
|
||||
HostContext.WritePerfCounter($"JobRequestRenewed_{requestId.ToString()}");
|
||||
|
||||
HostContext.WritePerfCounter($"SentJobToWorker_{requestId.ToString()}");
|
||||
|
||||
try
|
||||
Task<int> workerProcessTask = null;
|
||||
object _outputLock = new object();
|
||||
List<string> workerOutput = new List<string>();
|
||||
using (var processChannel = HostContext.CreateService<IProcessChannel>())
|
||||
using (var processInvoker = HostContext.CreateService<IProcessInvoker>())
|
||||
{
|
||||
TaskResult resultOnAbandonOrCancel = TaskResult.Succeeded;
|
||||
// wait for renewlock, worker process or cancellation token been fired.
|
||||
var completedTask = await Task.WhenAny(renewJobRequest, workerProcessTask, Task.Delay(-1, jobRequestCancellationToken));
|
||||
if (completedTask == workerProcessTask)
|
||||
{
|
||||
// worker finished successfully, complete job request with result, attach unhandled exception reported by worker, stop renew lock, job has finished.
|
||||
int returnCode = await workerProcessTask;
|
||||
Trace.Info($"Worker finished for job {message.JobId}. Code: " + returnCode);
|
||||
|
||||
string detailInfo = null;
|
||||
if (!TaskResultUtil.IsValidReturnCode(returnCode))
|
||||
// Start the process channel.
|
||||
// It's OK if StartServer bubbles an execption after the worker process has already started.
|
||||
// The worker will shutdown after 30 seconds if it hasn't received the job message.
|
||||
processChannel.StartServer(
|
||||
// Delegate to start the child process.
|
||||
startProcess: (string pipeHandleOut, string pipeHandleIn) =>
|
||||
{
|
||||
detailInfo = string.Join(Environment.NewLine, workerOutput);
|
||||
Trace.Info($"Return code {returnCode} indicate worker encounter an unhandled exception or app crash, attach worker stdout/stderr to JobRequest result.");
|
||||
await LogWorkerProcessUnhandledException(message, detailInfo);
|
||||
// Validate args.
|
||||
ArgUtil.NotNullOrEmpty(pipeHandleOut, nameof(pipeHandleOut));
|
||||
ArgUtil.NotNullOrEmpty(pipeHandleIn, nameof(pipeHandleIn));
|
||||
|
||||
// Save STDOUT from worker, worker will use STDOUT report unhandle exception.
|
||||
processInvoker.OutputDataReceived += delegate (object sender, ProcessDataReceivedEventArgs stdout)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(stdout.Data))
|
||||
{
|
||||
lock (_outputLock)
|
||||
{
|
||||
workerOutput.Add(stdout.Data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Save STDERR from worker, worker will use STDERR on crash.
|
||||
processInvoker.ErrorDataReceived += delegate (object sender, ProcessDataReceivedEventArgs stderr)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(stderr.Data))
|
||||
{
|
||||
lock (_outputLock)
|
||||
{
|
||||
workerOutput.Add(stderr.Data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Start the child process.
|
||||
HostContext.WritePerfCounter("StartingWorkerProcess");
|
||||
var assemblyDirectory = HostContext.GetDirectory(WellKnownDirectory.Bin);
|
||||
string workerFileName = Path.Combine(assemblyDirectory, _workerProcessName);
|
||||
workerProcessTask = processInvoker.ExecuteAsync(
|
||||
workingDirectory: assemblyDirectory,
|
||||
fileName: workerFileName,
|
||||
arguments: "spawnclient " + pipeHandleOut + " " + pipeHandleIn,
|
||||
environment: null,
|
||||
requireExitCodeZero: false,
|
||||
outputEncoding: null,
|
||||
killProcessOnCancel: true,
|
||||
redirectStandardIn: null,
|
||||
inheritConsoleHandler: false,
|
||||
keepStandardInOpen: false,
|
||||
highPriorityProcess: true,
|
||||
cancellationToken: workerProcessCancelTokenSource.Token);
|
||||
});
|
||||
|
||||
// Send the job request message.
|
||||
// Kill the worker process if sending the job message times out. The worker
|
||||
// process may have successfully received the job message.
|
||||
try
|
||||
{
|
||||
Trace.Info($"Send job request message to worker for job {message.JobId}.");
|
||||
HostContext.WritePerfCounter($"RunnerSendingJobToWorker_{message.JobId}");
|
||||
using (var csSendJobRequest = new CancellationTokenSource(_channelTimeout))
|
||||
{
|
||||
await processChannel.SendAsync(
|
||||
messageType: MessageType.NewJobRequest,
|
||||
body: JsonUtility.ToString(message),
|
||||
cancellationToken: csSendJobRequest.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// message send been cancelled.
|
||||
// timeout 30 sec. kill worker.
|
||||
Trace.Info($"Job request message sending for job {message.JobId} been cancelled, kill running worker.");
|
||||
workerProcessCancelTokenSource.Cancel();
|
||||
try
|
||||
{
|
||||
await workerProcessTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Trace.Info("worker process has been killed.");
|
||||
}
|
||||
|
||||
TaskResult result = TaskResultUtil.TranslateFromReturnCode(returnCode);
|
||||
Trace.Info($"finish job request for job {message.JobId} with result: {result}");
|
||||
term.WriteLine($"{DateTime.UtcNow:u}: Job {message.JobDisplayName} completed with result: {result}");
|
||||
Trace.Info($"Stop renew job request for job {message.JobId}.");
|
||||
// stop renew lock
|
||||
lockRenewalTokenSource.Cancel();
|
||||
// renew job request should never blows up.
|
||||
await renewJobRequest;
|
||||
|
||||
// not finish the job request since the job haven't run on worker at all, we will not going to set a result to server.
|
||||
return;
|
||||
}
|
||||
|
||||
// we get first jobrequest renew succeed and start the worker process with the job message.
|
||||
// send notification to machine provisioner.
|
||||
var systemConnection = message.Resources.Endpoints.SingleOrDefault(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase));
|
||||
var accessToken = systemConnection?.Authorization?.Parameters["AccessToken"];
|
||||
notification.JobStarted(message.JobId, accessToken, systemConnection.Url);
|
||||
|
||||
HostContext.WritePerfCounter($"SentJobToWorker_{requestId.ToString()}");
|
||||
|
||||
try
|
||||
{
|
||||
TaskResult resultOnAbandonOrCancel = TaskResult.Succeeded;
|
||||
// wait for renewlock, worker process or cancellation token been fired.
|
||||
var completedTask = await Task.WhenAny(renewJobRequest, workerProcessTask, Task.Delay(-1, jobRequestCancellationToken));
|
||||
if (completedTask == workerProcessTask)
|
||||
{
|
||||
// worker finished successfully, complete job request with result, attach unhandled exception reported by worker, stop renew lock, job has finished.
|
||||
int returnCode = await workerProcessTask;
|
||||
Trace.Info($"Worker finished for job {message.JobId}. Code: " + returnCode);
|
||||
|
||||
string detailInfo = null;
|
||||
if (!TaskResultUtil.IsValidReturnCode(returnCode))
|
||||
{
|
||||
detailInfo = string.Join(Environment.NewLine, workerOutput);
|
||||
Trace.Info($"Return code {returnCode} indicate worker encounter an unhandled exception or app crash, attach worker stdout/stderr to JobRequest result.");
|
||||
await LogWorkerProcessUnhandledException(message, detailInfo);
|
||||
}
|
||||
|
||||
TaskResult result = TaskResultUtil.TranslateFromReturnCode(returnCode);
|
||||
Trace.Info($"finish job request for job {message.JobId} with result: {result}");
|
||||
term.WriteLine($"{DateTime.UtcNow:u}: Job {message.JobDisplayName} completed with result: {result}");
|
||||
|
||||
Trace.Info($"Stop renew job request for job {message.JobId}.");
|
||||
// stop renew lock
|
||||
lockRenewalTokenSource.Cancel();
|
||||
// renew job request should never blows up.
|
||||
await renewJobRequest;
|
||||
|
||||
// complete job request
|
||||
await CompleteJobRequestAsync(_poolId, message, lockToken, result, detailInfo);
|
||||
|
||||
// print out unhandled exception happened in worker after we complete job request.
|
||||
// when we run out of disk space, report back to server has higher priority.
|
||||
if (!string.IsNullOrEmpty(detailInfo))
|
||||
{
|
||||
Trace.Error("Unhandled exception happened in worker:");
|
||||
Trace.Error(detailInfo);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
else if (completedTask == renewJobRequest)
|
||||
{
|
||||
resultOnAbandonOrCancel = TaskResult.Abandoned;
|
||||
}
|
||||
else
|
||||
{
|
||||
resultOnAbandonOrCancel = TaskResult.Canceled;
|
||||
}
|
||||
|
||||
// renew job request completed or job request cancellation token been fired for RunAsync(jobrequestmessage)
|
||||
// cancel worker gracefully first, then kill it after worker cancel timeout
|
||||
try
|
||||
{
|
||||
Trace.Info($"Send job cancellation message to worker for job {message.JobId}.");
|
||||
using (var csSendCancel = new CancellationTokenSource(_channelTimeout))
|
||||
{
|
||||
var messageType = MessageType.CancelRequest;
|
||||
if (HostContext.RunnerShutdownToken.IsCancellationRequested)
|
||||
{
|
||||
switch (HostContext.RunnerShutdownReason)
|
||||
{
|
||||
case ShutdownReason.UserCancelled:
|
||||
messageType = MessageType.RunnerShutdown;
|
||||
break;
|
||||
case ShutdownReason.OperatingSystemShutdown:
|
||||
messageType = MessageType.OperatingSystemShutdown;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
await processChannel.SendAsync(
|
||||
messageType: messageType,
|
||||
body: string.Empty,
|
||||
cancellationToken: csSendCancel.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// message send been cancelled.
|
||||
Trace.Info($"Job cancel message sending for job {message.JobId} been cancelled, kill running worker.");
|
||||
workerProcessCancelTokenSource.Cancel();
|
||||
try
|
||||
{
|
||||
await workerProcessTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Trace.Info("worker process has been killed.");
|
||||
}
|
||||
}
|
||||
|
||||
// wait worker to exit
|
||||
// if worker doesn't exit within timeout, then kill worker.
|
||||
completedTask = await Task.WhenAny(workerProcessTask, Task.Delay(-1, workerCancelTimeoutKillToken));
|
||||
|
||||
// worker haven't exit within cancellation timeout.
|
||||
if (completedTask != workerProcessTask)
|
||||
{
|
||||
Trace.Info($"worker process for job {message.JobId} haven't exit within cancellation timout, kill running worker.");
|
||||
workerProcessCancelTokenSource.Cancel();
|
||||
try
|
||||
{
|
||||
await workerProcessTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Trace.Info("worker process has been killed.");
|
||||
}
|
||||
|
||||
// When worker doesn't exit within cancel timeout, the runner will kill the worker process and worker won't finish upload job logs.
|
||||
// The runner will try to upload these logs at this time.
|
||||
await TryUploadUnfinishedLogs(message);
|
||||
}
|
||||
|
||||
Trace.Info($"finish job request for job {message.JobId} with result: {resultOnAbandonOrCancel}");
|
||||
term.WriteLine($"{DateTime.UtcNow:u}: Job {message.JobDisplayName} completed with result: {resultOnAbandonOrCancel}");
|
||||
// complete job request with cancel result, stop renew lock, job has finished.
|
||||
|
||||
Trace.Info($"Stop renew job request for job {message.JobId}.");
|
||||
// stop renew lock
|
||||
@@ -488,112 +591,20 @@ namespace GitHub.Runner.Listener
|
||||
await renewJobRequest;
|
||||
|
||||
// complete job request
|
||||
await CompleteJobRequestAsync(_poolId, message, lockToken, result, detailInfo);
|
||||
|
||||
// print out unhandled exception happened in worker after we complete job request.
|
||||
// when we run out of disk space, report back to server has higher priority.
|
||||
if (!string.IsNullOrEmpty(detailInfo))
|
||||
{
|
||||
Trace.Error("Unhandled exception happened in worker:");
|
||||
Trace.Error(detailInfo);
|
||||
}
|
||||
|
||||
return;
|
||||
await CompleteJobRequestAsync(_poolId, message, lockToken, resultOnAbandonOrCancel);
|
||||
}
|
||||
else if (completedTask == renewJobRequest)
|
||||
finally
|
||||
{
|
||||
resultOnAbandonOrCancel = TaskResult.Abandoned;
|
||||
// This should be the last thing to run so we don't notify external parties until actually finished
|
||||
await notification.JobCompleted(message.JobId);
|
||||
}
|
||||
else
|
||||
{
|
||||
resultOnAbandonOrCancel = TaskResult.Canceled;
|
||||
}
|
||||
|
||||
// renew job request completed or job request cancellation token been fired for RunAsync(jobrequestmessage)
|
||||
// cancel worker gracefully first, then kill it after worker cancel timeout
|
||||
try
|
||||
{
|
||||
Trace.Info($"Send job cancellation message to worker for job {message.JobId}.");
|
||||
using (var csSendCancel = new CancellationTokenSource(_channelTimeout))
|
||||
{
|
||||
var messageType = MessageType.CancelRequest;
|
||||
if (HostContext.RunnerShutdownToken.IsCancellationRequested)
|
||||
{
|
||||
switch (HostContext.RunnerShutdownReason)
|
||||
{
|
||||
case ShutdownReason.UserCancelled:
|
||||
messageType = MessageType.RunnerShutdown;
|
||||
break;
|
||||
case ShutdownReason.OperatingSystemShutdown:
|
||||
messageType = MessageType.OperatingSystemShutdown;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
await processChannel.SendAsync(
|
||||
messageType: messageType,
|
||||
body: string.Empty,
|
||||
cancellationToken: csSendCancel.Token);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// message send been cancelled.
|
||||
Trace.Info($"Job cancel message sending for job {message.JobId} been cancelled, kill running worker.");
|
||||
workerProcessCancelTokenSource.Cancel();
|
||||
try
|
||||
{
|
||||
await workerProcessTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Trace.Info("worker process has been killed.");
|
||||
}
|
||||
}
|
||||
|
||||
// wait worker to exit
|
||||
// if worker doesn't exit within timeout, then kill worker.
|
||||
completedTask = await Task.WhenAny(workerProcessTask, Task.Delay(-1, workerCancelTimeoutKillToken));
|
||||
|
||||
// worker haven't exit within cancellation timeout.
|
||||
if (completedTask != workerProcessTask)
|
||||
{
|
||||
Trace.Info($"worker process for job {message.JobId} haven't exit within cancellation timout, kill running worker.");
|
||||
workerProcessCancelTokenSource.Cancel();
|
||||
try
|
||||
{
|
||||
await workerProcessTask;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Trace.Info("worker process has been killed.");
|
||||
}
|
||||
|
||||
// When worker doesn't exit within cancel timeout, the runner will kill the worker process and worker won't finish upload job logs.
|
||||
// The runner will try to upload these logs at this time.
|
||||
await TryUploadUnfinishedLogs(message);
|
||||
}
|
||||
|
||||
Trace.Info($"finish job request for job {message.JobId} with result: {resultOnAbandonOrCancel}");
|
||||
term.WriteLine($"{DateTime.UtcNow:u}: Job {message.JobDisplayName} completed with result: {resultOnAbandonOrCancel}");
|
||||
// complete job request with cancel result, stop renew lock, job has finished.
|
||||
|
||||
Trace.Info($"Stop renew job request for job {message.JobId}.");
|
||||
// stop renew lock
|
||||
lockRenewalTokenSource.Cancel();
|
||||
// renew job request should never blows up.
|
||||
await renewJobRequest;
|
||||
|
||||
// complete job request
|
||||
await CompleteJobRequestAsync(_poolId, message, lockToken, resultOnAbandonOrCancel);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// This should be the last thing to run so we don't notify external parties until actually finished
|
||||
await notification.JobCompleted(message.JobId);
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
Busy = false;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task RenewJobRequestAsync(int poolId, long requestId, Guid lockToken, TaskCompletionSource<int> firstJobRequestRenewed, CancellationToken token)
|
||||
|
||||
@@ -13,7 +13,10 @@ using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
using GitHub.Runner.Common;
|
||||
using GitHub.Runner.Sdk;
|
||||
using GitHub.Services.WebApi;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
[assembly: InternalsVisibleTo("Test")]
|
||||
namespace GitHub.Runner.Listener
|
||||
{
|
||||
[ServiceLocator(Default = typeof(MessageListener))]
|
||||
@@ -32,18 +35,30 @@ namespace GitHub.Runner.Listener
|
||||
private ITerminal _term;
|
||||
private IRunnerServer _runnerServer;
|
||||
private TaskAgentSession _session;
|
||||
private ICredentialManager _credMgr;
|
||||
private IConfigurationStore _configStore;
|
||||
private TimeSpan _getNextMessageRetryInterval;
|
||||
private readonly TimeSpan _sessionCreationRetryInterval = TimeSpan.FromSeconds(30);
|
||||
private readonly TimeSpan _sessionConflictRetryLimit = TimeSpan.FromMinutes(4);
|
||||
private readonly TimeSpan _clockSkewRetryLimit = TimeSpan.FromMinutes(30);
|
||||
private readonly Dictionary<string, int> _sessionCreationExceptionTracker = new Dictionary<string, int>();
|
||||
|
||||
// Whether load credentials from .credentials_migrated file
|
||||
internal bool _useMigratedCredentials;
|
||||
|
||||
// need to check auth url if there is only .credentials and auth schema is OAuth
|
||||
internal bool _needToCheckAuthorizationUrlUpdate;
|
||||
internal Task<VssCredentials> _authorizationUrlMigrationBackgroundTask;
|
||||
internal Task _authorizationUrlRollbackReattemptDelayBackgroundTask;
|
||||
|
||||
public override void Initialize(IHostContext hostContext)
|
||||
{
|
||||
base.Initialize(hostContext);
|
||||
|
||||
_term = HostContext.GetService<ITerminal>();
|
||||
_runnerServer = HostContext.GetService<IRunnerServer>();
|
||||
_credMgr = HostContext.GetService<ICredentialManager>();
|
||||
_configStore = HostContext.GetService<IConfigurationStore>();
|
||||
}
|
||||
|
||||
public async Task<Boolean> CreateSessionAsync(CancellationToken token)
|
||||
@@ -58,8 +73,8 @@ namespace GitHub.Runner.Listener
|
||||
|
||||
// Create connection.
|
||||
Trace.Info("Loading Credentials");
|
||||
var credMgr = HostContext.GetService<ICredentialManager>();
|
||||
VssCredentials creds = credMgr.LoadCredentials();
|
||||
_useMigratedCredentials = !StringUtil.ConvertToBoolean(Environment.GetEnvironmentVariable("GITHUB_ACTIONS_RUNNER_SPSAUTHURL"));
|
||||
VssCredentials creds = _credMgr.LoadCredentials(_useMigratedCredentials);
|
||||
|
||||
var agent = new TaskAgentReference
|
||||
{
|
||||
@@ -74,6 +89,17 @@ namespace GitHub.Runner.Listener
|
||||
string errorMessage = string.Empty;
|
||||
bool encounteringError = false;
|
||||
|
||||
var originalCreds = _configStore.GetCredentials();
|
||||
var migratedCreds = _configStore.GetMigratedCredentials();
|
||||
if (migratedCreds == null)
|
||||
{
|
||||
_useMigratedCredentials = false;
|
||||
if (originalCreds.Scheme == Constants.Configuration.OAuth)
|
||||
{
|
||||
_needToCheckAuthorizationUrlUpdate = true;
|
||||
}
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
@@ -83,7 +109,7 @@ namespace GitHub.Runner.Listener
|
||||
Trace.Info("Connecting to the Runner Server...");
|
||||
await _runnerServer.ConnectAsync(new Uri(serverUrl), creds);
|
||||
Trace.Info("VssConnection created");
|
||||
|
||||
|
||||
_term.WriteLine();
|
||||
_term.WriteSuccessMessage("Connected to GitHub");
|
||||
_term.WriteLine();
|
||||
@@ -101,6 +127,12 @@ namespace GitHub.Runner.Listener
|
||||
encounteringError = false;
|
||||
}
|
||||
|
||||
if (_needToCheckAuthorizationUrlUpdate)
|
||||
{
|
||||
// start background task try to get new authorization url
|
||||
_authorizationUrlMigrationBackgroundTask = GetNewOAuthAuthorizationSetting(token);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (OperationCanceledException) when (token.IsCancellationRequested)
|
||||
@@ -120,8 +152,21 @@ namespace GitHub.Runner.Listener
|
||||
|
||||
if (!IsSessionCreationExceptionRetriable(ex))
|
||||
{
|
||||
_term.WriteError($"Failed to create session. {ex.Message}");
|
||||
return false;
|
||||
if (_useMigratedCredentials)
|
||||
{
|
||||
// migrated credentials might cause lose permission during permission check,
|
||||
// we will force to use original credential and try again
|
||||
_useMigratedCredentials = false;
|
||||
var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
|
||||
_authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
|
||||
creds = _credMgr.LoadCredentials(false);
|
||||
Trace.Error("Fallback to original credentials and try again.");
|
||||
}
|
||||
else
|
||||
{
|
||||
_term.WriteError($"Failed to create session. {ex.Message}");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!encounteringError) //print the message only on the first error
|
||||
@@ -182,6 +227,51 @@ namespace GitHub.Runner.Listener
|
||||
encounteringError = false;
|
||||
continuousError = 0;
|
||||
}
|
||||
|
||||
if (_needToCheckAuthorizationUrlUpdate &&
|
||||
_authorizationUrlMigrationBackgroundTask?.IsCompleted == true)
|
||||
{
|
||||
if (HostContext.GetService<IJobDispatcher>().Busy ||
|
||||
HostContext.GetService<ISelfUpdater>().Busy)
|
||||
{
|
||||
Trace.Info("Job or runner updates in progress, update credentials next time.");
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
var newCred = await _authorizationUrlMigrationBackgroundTask;
|
||||
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), newCred);
|
||||
Trace.Info("Updated connection to use migrated credential for next GetMessage call.");
|
||||
_useMigratedCredentials = true;
|
||||
_authorizationUrlMigrationBackgroundTask = null;
|
||||
_needToCheckAuthorizationUrlUpdate = false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Trace.Error("Fail to refresh connection with new authorization url.");
|
||||
Trace.Error(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (_authorizationUrlRollbackReattemptDelayBackgroundTask?.IsCompleted == true)
|
||||
{
|
||||
try
|
||||
{
|
||||
// we rolled back to use original creds about 2 days before, now it's a good time to try migrated creds again.
|
||||
Trace.Info("Re-attempt to use migrated credential");
|
||||
var migratedCreds = _credMgr.LoadCredentials();
|
||||
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedCreds);
|
||||
_useMigratedCredentials = true;
|
||||
_authorizationUrlRollbackReattemptDelayBackgroundTask = null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Trace.Error("Fail to refresh connection with new authorization url on rollback reattempt.");
|
||||
Trace.Error(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (token.IsCancellationRequested)
|
||||
{
|
||||
@@ -205,7 +295,21 @@ namespace GitHub.Runner.Listener
|
||||
}
|
||||
else if (!IsGetNextMessageExceptionRetriable(ex))
|
||||
{
|
||||
throw;
|
||||
if (_useMigratedCredentials)
|
||||
{
|
||||
// migrated credentials might cause lose permission during permission check,
|
||||
// we will force to use original credential and try again
|
||||
_useMigratedCredentials = false;
|
||||
var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
|
||||
_authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
|
||||
var originalCreds = _credMgr.LoadCredentials(false);
|
||||
await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), originalCreds);
|
||||
Trace.Error("Fallback to original credentials and try again.");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -397,5 +501,80 @@ namespace GitHub.Runner.Listener
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<VssCredentials> GetNewOAuthAuthorizationSetting(CancellationToken token)
|
||||
{
|
||||
Trace.Info("Start checking oauth authorization url update.");
|
||||
while (true)
|
||||
{
|
||||
var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromMinutes(30), TimeSpan.FromMinutes(45));
|
||||
await HostContext.Delay(backoff, token);
|
||||
|
||||
try
|
||||
{
|
||||
var migratedAuthorizationUrl = await _runnerServer.GetRunnerAuthUrlAsync(_settings.PoolId, _settings.AgentId);
|
||||
if (!string.IsNullOrEmpty(migratedAuthorizationUrl))
|
||||
{
|
||||
var credData = _configStore.GetCredentials();
|
||||
var clientId = credData.Data.GetValueOrDefault("clientId", null);
|
||||
var currentAuthorizationUrl = credData.Data.GetValueOrDefault("authorizationUrl", null);
|
||||
Trace.Info($"Current authorization url: {currentAuthorizationUrl}, new authorization url: {migratedAuthorizationUrl}");
|
||||
|
||||
if (string.Equals(currentAuthorizationUrl, migratedAuthorizationUrl, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// We don't need to update credentials.
|
||||
Trace.Info("No needs to update authorization url");
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(-1), token);
|
||||
}
|
||||
|
||||
var keyManager = HostContext.GetService<IRSAKeyManager>();
|
||||
var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());
|
||||
|
||||
var migratedClientCredential = new VssOAuthJwtBearerClientCredential(clientId, migratedAuthorizationUrl, signingCredentials);
|
||||
var migratedRunnerCredential = new VssOAuthCredential(new Uri(migratedAuthorizationUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, migratedClientCredential);
|
||||
|
||||
Trace.Info("Try connect service with Token Service OAuth endpoint.");
|
||||
var runnerServer = HostContext.CreateService<IRunnerServer>();
|
||||
await runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedRunnerCredential);
|
||||
await runnerServer.GetAgentPoolsAsync();
|
||||
Trace.Info($"Successfully connected service with new authorization url.");
|
||||
|
||||
var migratedCredData = new CredentialData
|
||||
{
|
||||
Scheme = Constants.Configuration.OAuth,
|
||||
Data =
|
||||
{
|
||||
{ "clientId", clientId },
|
||||
{ "authorizationUrl", migratedAuthorizationUrl },
|
||||
{ "oauthEndpointUrl", migratedAuthorizationUrl },
|
||||
},
|
||||
};
|
||||
|
||||
_configStore.SaveMigratedCredential(migratedCredData);
|
||||
return migratedRunnerCredential;
|
||||
}
|
||||
else
|
||||
{
|
||||
Trace.Verbose("No authorization url updates");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Trace.Error("Fail to get/test new authorization url.");
|
||||
Trace.Error(ex);
|
||||
|
||||
try
|
||||
{
|
||||
await _runnerServer.ReportRunnerAuthUrlErrorAsync(_settings.PoolId, _settings.AgentId, ex.ToString());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
// best effort
|
||||
Trace.Error("Fail to report the migration error");
|
||||
Trace.Error(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ namespace GitHub.Runner.Listener
|
||||
[ServiceLocator(Default = typeof(SelfUpdater))]
|
||||
public interface ISelfUpdater : IRunnerService
|
||||
{
|
||||
bool Busy { get; }
|
||||
Task<bool> SelfUpdate(AgentRefreshMessage updateMessage, IJobDispatcher jobDispatcher, bool restartInteractiveRunner, CancellationToken token);
|
||||
}
|
||||
|
||||
@@ -31,6 +32,8 @@ namespace GitHub.Runner.Listener
|
||||
private int _poolId;
|
||||
private int _agentId;
|
||||
|
||||
public bool Busy { get; private set; }
|
||||
|
||||
public override void Initialize(IHostContext hostContext)
|
||||
{
|
||||
base.Initialize(hostContext);
|
||||
@@ -45,52 +48,60 @@ namespace GitHub.Runner.Listener
|
||||
|
||||
public async Task<bool> SelfUpdate(AgentRefreshMessage updateMessage, IJobDispatcher jobDispatcher, bool restartInteractiveRunner, CancellationToken token)
|
||||
{
|
||||
if (!await UpdateNeeded(updateMessage.TargetVersion, token))
|
||||
Busy = true;
|
||||
try
|
||||
{
|
||||
Trace.Info($"Can't find available update package.");
|
||||
return false;
|
||||
}
|
||||
if (!await UpdateNeeded(updateMessage.TargetVersion, token))
|
||||
{
|
||||
Trace.Info($"Can't find available update package.");
|
||||
return false;
|
||||
}
|
||||
|
||||
Trace.Info($"An update is available.");
|
||||
Trace.Info($"An update is available.");
|
||||
|
||||
// Print console line that warn user not shutdown runner.
|
||||
await UpdateRunnerUpdateStateAsync("Runner update in progress, do not shutdown runner.");
|
||||
await UpdateRunnerUpdateStateAsync($"Downloading {_targetPackage.Version} runner");
|
||||
// Print console line that warn user not shutdown runner.
|
||||
await UpdateRunnerUpdateStateAsync("Runner update in progress, do not shutdown runner.");
|
||||
await UpdateRunnerUpdateStateAsync($"Downloading {_targetPackage.Version} runner");
|
||||
|
||||
await DownloadLatestRunner(token);
|
||||
Trace.Info($"Download latest runner and unzip into runner root.");
|
||||
await DownloadLatestRunner(token);
|
||||
Trace.Info($"Download latest runner and unzip into runner root.");
|
||||
|
||||
// wait till all running job finish
|
||||
await UpdateRunnerUpdateStateAsync("Waiting for current job finish running.");
|
||||
// wait till all running job finish
|
||||
await UpdateRunnerUpdateStateAsync("Waiting for current job finish running.");
|
||||
|
||||
await jobDispatcher.WaitAsync(token);
|
||||
Trace.Info($"All running job has exited.");
|
||||
await jobDispatcher.WaitAsync(token);
|
||||
Trace.Info($"All running job has exited.");
|
||||
|
||||
// delete runner backup
|
||||
DeletePreviousVersionRunnerBackup(token);
|
||||
Trace.Info($"Delete old version runner backup.");
|
||||
// delete runner backup
|
||||
DeletePreviousVersionRunnerBackup(token);
|
||||
Trace.Info($"Delete old version runner backup.");
|
||||
|
||||
// generate update script from template
|
||||
await UpdateRunnerUpdateStateAsync("Generate and execute update script.");
|
||||
// generate update script from template
|
||||
await UpdateRunnerUpdateStateAsync("Generate and execute update script.");
|
||||
|
||||
string updateScript = GenerateUpdateScript(restartInteractiveRunner);
|
||||
Trace.Info($"Generate update script into: {updateScript}");
|
||||
string updateScript = GenerateUpdateScript(restartInteractiveRunner);
|
||||
Trace.Info($"Generate update script into: {updateScript}");
|
||||
|
||||
// kick off update script
|
||||
Process invokeScript = new Process();
|
||||
// kick off update script
|
||||
Process invokeScript = new Process();
|
||||
#if OS_WINDOWS
|
||||
invokeScript.StartInfo.FileName = WhichUtil.Which("cmd.exe", trace: Trace);
|
||||
invokeScript.StartInfo.Arguments = $"/c \"{updateScript}\"";
|
||||
#elif (OS_OSX || OS_LINUX)
|
||||
invokeScript.StartInfo.FileName = WhichUtil.Which("bash", trace: Trace);
|
||||
invokeScript.StartInfo.Arguments = $"\"{updateScript}\"";
|
||||
invokeScript.StartInfo.FileName = WhichUtil.Which("bash", trace: Trace);
|
||||
invokeScript.StartInfo.Arguments = $"\"{updateScript}\"";
|
||||
#endif
|
||||
invokeScript.Start();
|
||||
Trace.Info($"Update script start running");
|
||||
invokeScript.Start();
|
||||
Trace.Info($"Update script start running");
|
||||
|
||||
await UpdateRunnerUpdateStateAsync("Runner will exit shortly for update, should back online within 10 seconds.");
|
||||
await UpdateRunnerUpdateStateAsync("Runner will exit shortly for update, should back online within 10 seconds.");
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
Busy = false;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<bool> UpdateNeeded(string targetVersion, CancellationToken token)
|
||||
|
||||
@@ -779,5 +779,65 @@ namespace GitHub.DistributedTask.WebApi
|
||||
userState: userState,
|
||||
cancellationToken: cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// [Preview API]
|
||||
/// </summary>
|
||||
/// <param name="poolId"></param>
|
||||
/// <param name="agentId"></param>
|
||||
/// <param name="userState"></param>
|
||||
/// <param name="cancellationToken">The cancellation token to cancel operation.</param>
|
||||
public Task<String> GetAgentAuthUrlAsync(
|
||||
int poolId,
|
||||
int agentId,
|
||||
object userState = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
HttpMethod httpMethod = new HttpMethod("GET");
|
||||
Guid locationId = new Guid("a82a119c-1e46-44b6-8d75-c82a79cf975b");
|
||||
object routeValues = new { poolId = poolId, agentId = agentId };
|
||||
|
||||
return SendAsync<String>(
|
||||
httpMethod,
|
||||
locationId,
|
||||
routeValues: routeValues,
|
||||
version: new ApiResourceVersion(6.0, 1),
|
||||
userState: userState,
|
||||
cancellationToken: cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// [Preview API]
|
||||
/// </summary>
|
||||
/// <param name="poolId"></param>
|
||||
/// <param name="agentId"></param>
|
||||
/// <param name="error"></param>
|
||||
/// <param name="userState"></param>
|
||||
/// <param name="cancellationToken">The cancellation token to cancel operation.</param>
|
||||
[EditorBrowsable(EditorBrowsableState.Never)]
|
||||
public virtual async Task ReportAgentAuthUrlMigrationErrorAsync(
|
||||
int poolId,
|
||||
int agentId,
|
||||
string error,
|
||||
object userState = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
HttpMethod httpMethod = new HttpMethod("POST");
|
||||
Guid locationId = new Guid("a82a119c-1e46-44b6-8d75-c82a79cf975b");
|
||||
object routeValues = new { poolId = poolId, agentId = agentId };
|
||||
HttpContent content = new ObjectContent<string>(error, new VssJsonMediaTypeFormatter(true));
|
||||
|
||||
using (HttpResponseMessage response = await SendAsync(
|
||||
httpMethod,
|
||||
locationId,
|
||||
routeValues: routeValues,
|
||||
version: new ApiResourceVersion(6.0, 1),
|
||||
userState: userState,
|
||||
cancellationToken: cancellationToken,
|
||||
content: content).ConfigureAwait(false))
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,5 +260,8 @@ namespace GitHub.DistributedTask.WebApi
|
||||
public static readonly Guid CheckpointResourcesLocationId = new Guid(CheckpointResourcesLocationIdString);
|
||||
public const String CheckpointResourcesResource = "references";
|
||||
|
||||
public static readonly Guid RunnerAuthUrl = new Guid("{A82A119C-1E46-44B6-8D75-C82A79CF975B}");
|
||||
public const string RunnerAuthUrlResource = "authurl";
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -198,7 +198,7 @@ namespace GitHub.Runner.Common.Tests
|
||||
|
||||
case WellKnownDirectory.Tools:
|
||||
path = Environment.GetEnvironmentVariable("RUNNER_TOOL_CACHE");
|
||||
|
||||
|
||||
if (string.IsNullOrEmpty(path))
|
||||
{
|
||||
path = Path.Combine(
|
||||
|
||||
Reference in New Issue
Block a user