Compare commits

..

1 Commits

Author SHA1 Message Date
eric sciple
7731bf9ad5 adding compile-time const whether to use the broker service 2022-04-08 19:47:44 +00:00
12 changed files with 210 additions and 146 deletions

View File

@@ -47,6 +47,11 @@
<DefineConstants>$(DefineConstants);DEBUG</DefineConstants>
</PropertyGroup>
<!-- Set USE_BROKER vars -->
<PropertyGroup Condition="'$(USE_BROKER)' == 'true'">
<DefineConstants>$(DefineConstants);USE_BROKER</DefineConstants>
</PropertyGroup>
<!-- Set Treat tarnings as errors -->
<PropertyGroup>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>

View File

@@ -0,0 +1,38 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using GitHub.Runner.Common.Util;
using GitHub.Services.WebApi;
using GitHub.Services.Common;
using GitHub.Runner.Sdk;
using System.Net.Http;
namespace GitHub.Runner.Common
{
[ServiceLocator(Default = typeof(BrokerServer))]
public interface IBrokerServer : IRunnerService
{
Task ConnectAsync(Uri serverUrl);
Task<GitHub.DistributedTask.WebApi.TaskAgentMessage> GetMessageAsync(Int32 poolId, Guid sessionId, Int64? lastMessageId, CancellationToken cancellationToken);
}
public sealed class BrokerServer : RunnerService, IBrokerServer
{
private HttpClient _httpClient;
public async Task ConnectAsync(Uri serverUrl)
{
_httpClient = new HttpClient();
_httpClient.BaseAddress = serverUrl;
_httpClient.Timeout = TimeSpan.FromSeconds(50);
await _httpClient.GetAsync("health");
}
public async Task<GitHub.DistributedTask.WebApi.TaskAgentMessage> GetMessageAsync(Int32 poolId, Guid sessionId, Int64? lastMessageId, CancellationToken cancellationToken)
{
await _httpClient.GetAsync("message");
return null;
}
}
}

View File

@@ -1,76 +0,0 @@
using System;
using System.Collections.Generic;
using System.Runtime.Serialization;
using System.Threading;
using System.Threading.Tasks;
using GitHub.DistributedTask.Pipelines;
using GitHub.DistributedTask.WebApi;
using GitHub.Runner.Common.Util;
using GitHub.Runner.Sdk;
using GitHub.Services.Common;
using GitHub.Services.WebApi;
namespace GitHub.Runner.Common
{
[ServiceLocator(Default = typeof(RunServer))]
public interface IRunServer : IRunnerService
{
Task ConnectAsync(Uri serverUrl, VssCredentials credentials);
Task<AgentJobRequestMessage> GetJobMessageAsync(string id);
}
public sealed class RunServer : RunnerService, IRunServer
{
private bool _hasConnection;
private VssConnection _connection;
private TaskAgentHttpClient _taskAgentClient;
public async Task ConnectAsync(Uri serverUrl, VssCredentials credentials)
{
_connection = await EstablishVssConnection(serverUrl, credentials, TimeSpan.FromSeconds(100));
_taskAgentClient = _connection.GetClient<TaskAgentHttpClient>();
_hasConnection = true;
}
private async Task<VssConnection> EstablishVssConnection(Uri serverUrl, VssCredentials credentials, TimeSpan timeout)
{
Trace.Info($"EstablishVssConnection");
Trace.Info($"Establish connection with {timeout.TotalSeconds} seconds timeout.");
int attemptCount = 5;
while (attemptCount-- > 0)
{
var connection = VssUtil.CreateConnection(serverUrl, credentials, timeout: timeout);
try
{
await connection.ConnectAsync();
return connection;
}
catch (Exception ex) when (attemptCount > 0)
{
Trace.Info($"Catch exception during connect. {attemptCount} attempt left.");
Trace.Error(ex);
await HostContext.Delay(TimeSpan.FromMilliseconds(100), CancellationToken.None);
}
}
// should never reach here.
throw new InvalidOperationException(nameof(EstablishVssConnection));
}
private void CheckConnection()
{
if (!_hasConnection)
{
throw new InvalidOperationException($"SetConnection");
}
}
public Task<AgentJobRequestMessage> GetJobMessageAsync(string id)
{
CheckConnection();
return _taskAgentClient.GetJobMessageAsync(id);
}
}
}

View File

@@ -40,6 +40,12 @@ namespace GitHub.Runner.Listener.Configuration
return creds;
}
#if USE_BROKER
public VssCredentials LoadCredentials()
{
return new VssCredentials();
}
#else
public VssCredentials LoadCredentials()
{
IConfigurationStore store = HostContext.GetService<IConfigurationStore>();
@@ -69,6 +75,7 @@ namespace GitHub.Runner.Listener.Configuration
return creds;
}
#endif
}
[DataContract]

View File

@@ -31,6 +31,7 @@ namespace GitHub.Runner.Listener
private RunnerSettings _settings;
private ITerminal _term;
private IRunnerServer _runnerServer;
private IBrokerServer _brokerServer;
private TaskAgentSession _session;
private TimeSpan _getNextMessageRetryInterval;
private bool _accessTokenRevoked = false;
@@ -45,8 +46,44 @@ namespace GitHub.Runner.Listener
_term = HostContext.GetService<ITerminal>();
_runnerServer = HostContext.GetService<IRunnerServer>();
_brokerServer = HostContext.GetService<IBrokerServer>();
}
#if USE_BROKER
public async Task<Boolean> CreateSessionAsync(CancellationToken token)
{
Trace.Entering();
// Settings
var configManager = HostContext.GetService<IConfigurationManager>();
_settings = configManager.LoadSettings();
var serverUrl = _settings.ServerUrl;
Trace.Info(_settings);
// Connect
token.ThrowIfCancellationRequested();
Trace.Info($"Attempt to create session.");
Trace.Info("Connecting to the Runner Server...");
_term.WriteLine($"Connecting to {new Uri(serverUrl)}");
await _brokerServer.ConnectAsync(new Uri(serverUrl));
_term.WriteLine();
_term.WriteSuccessMessage("Connected to GitHub");
_term.WriteLine();
// Session info
var agent = new TaskAgentReference
{
Id = _settings.AgentId,
Name = _settings.AgentName,
Version = BuildConstants.RunnerPackage.Version,
OSDescription = RuntimeInformation.OSDescription,
};
string sessionName = $"{Environment.MachineName ?? "RUNNER"}";
_session = new TaskAgentSession(sessionName, agent);
return true;
}
#else
public async Task<Boolean> CreateSessionAsync(CancellationToken token)
{
Trace.Entering();
@@ -151,6 +188,7 @@ namespace GitHub.Runner.Listener
}
}
}
#endif
public async Task DeleteSessionAsync()
{
@@ -170,6 +208,116 @@ namespace GitHub.Runner.Listener
}
}
#if USE_BROKER
public async Task<TaskAgentMessage> GetNextMessageAsync(CancellationToken token)
{
Trace.Entering();
ArgUtil.NotNull(_session, nameof(_session));
ArgUtil.NotNull(_settings, nameof(_settings));
bool encounteringError = false;
int continuousError = 0;
string errorMessage = string.Empty;
Stopwatch heartbeat = new Stopwatch();
heartbeat.Restart();
while (true)
{
token.ThrowIfCancellationRequested();
TaskAgentMessage message = null;
try
{
message = await _brokerServer.GetMessageAsync(_settings.PoolId, _session.SessionId, _lastMessageId, token);
// Decrypt the message body if the session is using encryption
message = DecryptMessage(message);
if (message != null)
{
_lastMessageId = message.MessageId;
}
if (encounteringError) //print the message once only if there was an error
{
_term.WriteLine($"{DateTime.UtcNow:u}: Runner reconnected.");
encounteringError = false;
continuousError = 0;
}
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
Trace.Info("Get next message has been cancelled.");
throw;
}
catch (TaskAgentAccessTokenExpiredException)
{
Trace.Info("Runner OAuth token has been revoked. Unable to pull message.");
_accessTokenRevoked = true;
throw;
}
catch (Exception ex)
{
Trace.Error("Catch exception during get next message.");
Trace.Error(ex);
// don't retry if SkipSessionRecover = true, DT service will delete agent session to stop agent from taking more jobs.
if (ex is TaskAgentSessionExpiredException && !_settings.SkipSessionRecover && await CreateSessionAsync(token))
{
Trace.Info($"{nameof(TaskAgentSessionExpiredException)} received, recovered by recreate session.");
}
else if (!IsGetNextMessageExceptionRetriable(ex))
{
throw;
}
else
{
continuousError++;
//retry after a random backoff to avoid service throttling
//in case of there is a service error happened and all agents get kicked off of the long poll and all agent try to reconnect back at the same time.
if (continuousError <= 5)
{
// random backoff [15, 30]
_getNextMessageRetryInterval = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(15), TimeSpan.FromSeconds(30), _getNextMessageRetryInterval);
}
else
{
// more aggressive backoff [30, 60]
_getNextMessageRetryInterval = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(30), TimeSpan.FromSeconds(60), _getNextMessageRetryInterval);
}
if (!encounteringError)
{
//print error only on the first consecutive error
_term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
encounteringError = true;
}
// re-create VssConnection before next retry
await _runnerServer.RefreshConnectionAsync(RunnerConnectionType.MessageQueue, TimeSpan.FromSeconds(60));
Trace.Info("Sleeping for {0} seconds before retrying.", _getNextMessageRetryInterval.TotalSeconds);
await HostContext.Delay(_getNextMessageRetryInterval, token);
}
}
if (message == null)
{
if (heartbeat.Elapsed > TimeSpan.FromMinutes(30))
{
Trace.Info($"No message retrieved from session '{_session.SessionId}' within last 30 minutes.");
heartbeat.Restart();
}
else
{
Trace.Verbose($"No message retrieved from session '{_session.SessionId}'.");
}
continue;
}
Trace.Info($"Message '{message.MessageId}' received from session '{_session.SessionId}'.");
return message;
}
}
#else
public async Task<TaskAgentMessage> GetNextMessageAsync(CancellationToken token)
{
Trace.Entering();
@@ -281,6 +429,7 @@ namespace GitHub.Runner.Listener
return message;
}
}
#endif
public async Task DeleteMessageAsync(TaskAgentMessage message)
{

View File

@@ -13,10 +13,6 @@ using GitHub.Runner.Sdk;
using System.Linq;
using GitHub.Runner.Listener.Check;
using System.Collections.Generic;
using System.Runtime.Serialization;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
namespace GitHub.Runner.Listener
{
@@ -461,35 +457,6 @@ namespace GitHub.Runner.Listener
}
}
}
// Broker flow
else if (string.Equals(message.MessageType, JobRequestMessageTypes.RunnerJobRequest, StringComparison.OrdinalIgnoreCase))
{
if (autoUpdateInProgress || runOnceJobReceived)
{
skipMessageDeletion = true;
Trace.Info($"Skip message deletion for job request message '{message.MessageId}'.");
}
else
{
var messageRef = StringUtil.ConvertFromJson<RunnerJobRequestRef>(message.Body);
// Create connection
var credMgr = HostContext.GetService<ICredentialManager>();
var creds = credMgr.LoadCredentials();
// todo: add retries
var runServer = HostContext.CreateService<IRunServer>();
await runServer.ConnectAsync(new Uri(settings.ServerUrl), creds);
var jobMessage = await runServer.GetJobMessageAsync(messageRef.RunnerRequestId);
jobDispatcher.Run(jobMessage, runOnce);
if (runOnce)
{
Trace.Info("One time used runner received job message.");
runOnceJobReceived = true;
}
}
}
else if (string.Equals(message.MessageType, JobCancelMessage.MessageType, StringComparison.OrdinalIgnoreCase))
{
var cancelJobMessage = JsonUtility.FromString<JobCancelMessage>(message.Body);

View File

@@ -1,13 +0,0 @@
using System.Runtime.Serialization;
namespace GitHub.Runner.Listener
{
[DataContract]
public sealed class RunnerJobRequestRef
{
[DataMember(Name = "id")]
public string Id { get; set; }
[DataMember(Name = "runner_request_id")]
public string RunnerRequestId { get; set; }
}
}

View File

@@ -57,7 +57,6 @@ namespace GitHub.Runner.Sdk
settings.SendTimeout = TimeSpan.FromSeconds(Math.Min(Math.Max(httpRequestTimeoutSeconds, 100), 1200));
}
settings.AllowAutoRedirect = true;
// Remove Invariant from the list of accepted languages.
//

View File

@@ -27,7 +27,6 @@ using System.Net.Http.Headers;
using System.Net.Http.Formatting;
using System.Threading;
using System.Threading.Tasks;
using GitHub.DistributedTask.Pipelines;
using GitHub.Services.Common;
using GitHub.Services.WebApi;

View File

@@ -5,6 +5,5 @@ namespace GitHub.DistributedTask.WebApi
public static class JobRequestMessageTypes
{
public const String PipelineAgentJobRequest = "PipelineAgentJobRequest";
public const String RunnerJobRequest = "RunnerJobRequest";
}
}

View File

@@ -141,24 +141,6 @@ namespace GitHub.DistributedTask.WebApi
return ReplaceAgentAsync(poolId, agent.Id, agent, userState, cancellationToken);
}
public Task<Pipelines.AgentJobRequestMessage> GetJobMessageAsync(
string messageId,
object userState = null,
CancellationToken cancellationToken = default)
{
HttpMethod httpMethod = new HttpMethod("GET");
Guid locationId = new Guid("25adab70-1379-4186-be8e-b643061ebe3a");
object routeValues = new { messageId = messageId };
return SendAsync<Pipelines.AgentJobRequestMessage>(
httpMethod,
locationId,
routeValues: routeValues,
version: new ApiResourceVersion(1.0, 1),
userState: userState,
cancellationToken: cancellationToken);
}
protected Task<T> SendAsync<T>(
HttpMethod method,
Guid locationId,

View File

@@ -2,7 +2,7 @@
###############################################################################
#
# ./dev.sh build/layout/test/package [Debug/Release]
# ./dev.sh build/layout/test/package [Debug/Release] [linux-x64|linux-x86|linux-arm64|linux-arm|osx-x64|win-x64|win-x86] [use-broker]
#
###############################################################################
@@ -11,6 +11,7 @@ set -e
DEV_CMD=$1
DEV_CONFIG=$2
DEV_TARGET_RUNTIME=$3
DEV_USE_BROKER=$4
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LAYOUT_DIR="$SCRIPT_DIR/../_layout"
@@ -81,6 +82,13 @@ elif [[ "$CURRENT_PLATFORM" == 'darwin' ]]; then
fi
fi
if [ -n "$DEV_USE_BROKER" ]; then
USE_BROKER='-p:USE_BROKER="true"'
else
USE_BROKER=''
fi
function failed()
{
local error=${1:-Undefined error}
@@ -114,13 +122,13 @@ function heading()
function build ()
{
heading "Building ..."
dotnet msbuild -t:Build -p:PackageRuntime="${RUNTIME_ID}" -p:BUILDCONFIG="${BUILD_CONFIG}" -p:RunnerVersion="${RUNNER_VERSION}" ./dir.proj || failed build
dotnet msbuild -t:Build -p:PackageRuntime="${RUNTIME_ID}" -p:BUILDCONFIG="${BUILD_CONFIG}" -p:RunnerVersion="${RUNNER_VERSION}" $USE_BROKER ./dir.proj || failed build
}
function layout ()
{
heading "Create layout ..."
dotnet msbuild -t:layout -p:PackageRuntime="${RUNTIME_ID}" -p:BUILDCONFIG="${BUILD_CONFIG}" -p:RunnerVersion="${RUNNER_VERSION}" ./dir.proj || failed build
dotnet msbuild -t:layout -p:PackageRuntime="${RUNTIME_ID}" -p:BUILDCONFIG="${BUILD_CONFIG}" -p:RunnerVersion="${RUNNER_VERSION}" $USE_BROKER ./dir.proj || failed build
#change execution flag to allow running with sudo
if [[ ("$CURRENT_PLATFORM" == "linux") || ("$CURRENT_PLATFORM" == "darwin") ]]; then