Recreate VssConnection on retry (#1316)

This commit is contained in:
Julio Barba
2021-09-09 19:09:17 -04:00
committed by GitHub
parent aab936d081
commit 4359dd605b
3 changed files with 23 additions and 12 deletions

View File

@@ -2,16 +2,19 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using GitHub.Runner.Sdk;
using GitHub.Services.WebApi;
using GitHub.Services.Common;
namespace GitHub.Runner.Common
{
[ServiceLocator(Default = typeof(JobServer))]
public interface IJobServer : IRunnerService
{
Task ConnectAsync(VssConnection jobConnection);
Task ConnectAsync(Uri jobServerUrl, VssCredentials jobServerCredential, DelegatingHandler[] delegatingHandler = null);
// logging and console
Task<TaskLog> AppendLogContentAsync(Guid scopeIdentifier, string hubName, Guid planId, int logId, Stream uploadStream, CancellationToken cancellationToken);
@@ -32,20 +35,21 @@ namespace GitHub.Runner.Common
private VssConnection _connection;
private TaskHttpClient _taskClient;
public async Task ConnectAsync(VssConnection jobConnection)
public async Task ConnectAsync(Uri jobServerUrl, VssCredentials jobServerCredential, DelegatingHandler[] delegatingHandler = null)
{
_connection = jobConnection;
Trace.Info($"Establishing connection for JobServer");
int attemptCount = 5;
while (!_connection.HasAuthenticated && attemptCount-- > 0)
while (attemptCount-- > 0)
{
try
{
await _connection.ConnectAsync();
await RefreshConnectionAsync(jobServerUrl, jobServerCredential, delegatingHandler);
break;
}
catch (Exception ex) when (attemptCount > 0)
{
Trace.Info($"Catch exception during connect. {attemptCount} attemp left.");
Trace.Info($"Catch exception during connect. {attemptCount} attempts left.");
Trace.Error(ex);
}
@@ -53,6 +57,15 @@ namespace GitHub.Runner.Common
}
_taskClient = _connection.GetClient<TaskHttpClient>();
}
private async Task RefreshConnectionAsync(Uri jobServerUrl, VssCredentials jobServerCredential, DelegatingHandler[] delegatingHandler)
{
Trace.Info($"Refresh JobServer VssConnection to get on a different AFD node.");
_hasConnection = false;
_connection?.Dispose();
_connection = VssUtil.CreateConnection(jobServerUrl, jobServerCredential, delegatingHandler);
await _connection.ConnectAsync();
_hasConnection = true;
}

View File

@@ -510,9 +510,8 @@ namespace GitHub.Runner.Listener
var jobServer = HostContext.GetService<IJobServer>();
VssCredentials jobServerCredential = VssUtil.GetVssCredential(systemConnection);
VssConnection jobConnection = VssUtil.CreateConnection(systemConnection.Url, jobServerCredential);
await jobServer.ConnectAsync(jobConnection);
await jobServer.ConnectAsync(systemConnection.Url, jobServerCredential);
await LogWorkerProcessUnhandledException(jobServer, message, detailInfo);
// Go ahead to finish the job with result 'Failed' if the STDERR from worker is System.IO.IOException, since it typically means we are running out of disk space.
@@ -791,9 +790,8 @@ namespace GitHub.Runner.Listener
var jobServer = HostContext.GetService<IJobServer>();
VssCredentials jobServerCredential = VssUtil.GetVssCredential(systemConnection);
VssConnection jobConnection = VssUtil.CreateConnection(systemConnection.Url, jobServerCredential);
await jobServer.ConnectAsync(jobConnection);
await jobServer.ConnectAsync(systemConnection.Url, jobServerCredential);
var timeline = await jobServer.GetTimelineAsync(message.Plan.ScopeIdentifier, message.Plan.PlanType, message.Plan.PlanId, message.Timeline.Id, CancellationToken.None);

View File

@@ -48,8 +48,8 @@ namespace GitHub.Runner.Worker
Trace.Info($"Creating job server with URL: {jobServerUrl}");
// jobServerQueue is the throttling reporter.
_jobServerQueue = HostContext.GetService<IJobServerQueue>();
VssConnection jobConnection = VssUtil.CreateConnection(jobServerUrl, jobServerCredential, new DelegatingHandler[] { new ThrottlingReportHandler(_jobServerQueue) });
await jobServer.ConnectAsync(jobConnection);
await jobServer.ConnectAsync(jobServerUrl, jobServerCredential, new DelegatingHandler[] { new ThrottlingReportHandler(_jobServerQueue) });
_jobServerQueue.Start(message);
HostContext.WritePerfCounter($"WorkerJobServerQueueStarted_{message.RequestId.ToString()}");