Collect telemetry to measure upload speed for different backend. (#2912)

This commit is contained in:
Tingluo Huang
2023-10-05 23:29:38 -04:00
committed by GitHub
parent e25c754744
commit f672567acc
3 changed files with 56 additions and 4 deletions

View File

@@ -1,6 +1,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading;
@@ -14,10 +15,11 @@ namespace GitHub.Runner.Common
[ServiceLocator(Default = typeof(JobServerQueue))]
public interface IJobServerQueue : IRunnerService, IThrottlingReporter
{
IList<JobTelemetry> JobTelemetries { get; }
TaskCompletionSource<int> JobRecordUpdated { get; }
event EventHandler<ThrottlingEventArgs> JobServerQueueThrottling;
Task ShutdownAsync();
void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false);
void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false, bool enableTelemetry = false);
void QueueWebConsoleLine(Guid stepRecordId, string line, long? lineNumber = null);
void QueueFileUpload(Guid timelineId, Guid timelineRecordId, string type, string name, string path, bool deleteSource);
void QueueResultsUpload(Guid timelineRecordId, string name, string path, string type, bool deleteSource, bool finalize, bool firstBlock, long totalLines);
@@ -69,13 +71,18 @@ namespace GitHub.Runner.Common
private Task[] _allDequeueTasks;
private readonly TaskCompletionSource<int> _jobCompletionSource = new();
private readonly TaskCompletionSource<int> _jobRecordUpdated = new();
private readonly List<JobTelemetry> _jobTelemetries = new();
private bool _queueInProcess = false;
private bool _resultsServiceOnly = false;
private Stopwatch _resultsUploadTimer = new();
private Stopwatch _actionsUploadTimer = new();
public TaskCompletionSource<int> JobRecordUpdated => _jobRecordUpdated;
public event EventHandler<ThrottlingEventArgs> JobServerQueueThrottling;
public IList<JobTelemetry> JobTelemetries => _jobTelemetries;
// Web console dequeue will start with process queue every 250ms for the first 60*4 times (~60 seconds).
// Then the dequeue will happen every 500ms.
// In this way, customer still can get instance live console output on job start,
@@ -87,6 +94,7 @@ namespace GitHub.Runner.Common
private bool _firstConsoleOutputs = true;
private bool _resultsClientInitiated = false;
private bool _enableTelemetry = false;
private delegate Task ResultsFileUploadHandler(ResultsUploadFileInfo file);
public override void Initialize(IHostContext hostContext)
@@ -96,10 +104,11 @@ namespace GitHub.Runner.Common
_resultsServer = hostContext.GetService<IResultsServer>();
}
public void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false)
public void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false, bool enableTelemetry = false)
{
Trace.Entering();
_resultsServiceOnly = resultsServiceOnly;
_enableTelemetry = enableTelemetry;
var serviceEndPoint = jobRequest.Resources.Endpoints.Single(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase));
@@ -211,6 +220,12 @@ namespace GitHub.Runner.Common
await _resultsServer.DisposeAsync();
Trace.Info("All queue process tasks have been stopped, and all queues are drained.");
if (_enableTelemetry)
{
var uploadTimeComparison = $"Actions upload time: {_actionsUploadTimer.ElapsedMilliseconds} ms, Result upload time: {_resultsUploadTimer.ElapsedMilliseconds} ms";
Trace.Info(uploadTimeComparison);
_jobTelemetries.Add(new JobTelemetry() { Type = JobTelemetryType.General, Message = uploadTimeComparison });
}
}
public void QueueWebConsoleLine(Guid stepRecordId, string line, long? lineNumber)
@@ -456,6 +471,10 @@ namespace GitHub.Runner.Common
{
try
{
if (_enableTelemetry)
{
_actionsUploadTimer.Start();
}
await UploadFile(file);
}
catch (Exception ex)
@@ -471,6 +490,13 @@ namespace GitHub.Runner.Common
// _fileUploadQueue.Enqueue(file);
//}
}
finally
{
if (_enableTelemetry)
{
_actionsUploadTimer.Stop();
}
}
}
Trace.Info("Try to upload {0} log files or attachments, success rate: {1}/{0}.", filesToUpload.Count, filesToUpload.Count - errorCount);
@@ -517,6 +543,10 @@ namespace GitHub.Runner.Common
{
try
{
if (_enableTelemetry)
{
_resultsUploadTimer.Start();
}
if (String.Equals(file.Type, ChecksAttachmentType.StepSummary, StringComparison.OrdinalIgnoreCase))
{
await UploadSummaryFile(file);
@@ -548,6 +578,13 @@ namespace GitHub.Runner.Common
SendResultsTelemetry(ex);
}
}
finally
{
if (_enableTelemetry)
{
_resultsUploadTimer.Stop();
}
}
}
Trace.Info("Tried to upload {0} file(s) to results, success rate: {1}/{0}.", filesToUpload.Count, filesToUpload.Count - errorCount);

View File

@@ -51,6 +51,13 @@ namespace GitHub.Runner.Worker
HostContext.UserAgents.Add(new ProductInfoHeaderValue("OrchestrationId", orchestrationId.Value));
}
var jobServerQueueTelemetry = false;
if (message.Variables.TryGetValue("DistributedTask.EnableJobServerQueueTelemetry", out VariableValue enableJobServerQueueTelemetry) &&
!string.IsNullOrEmpty(enableJobServerQueueTelemetry?.Value))
{
jobServerQueueTelemetry = StringUtil.ConvertToBoolean(enableJobServerQueueTelemetry.Value);
}
ServiceEndpoint systemConnection = message.Resources.Endpoints.Single(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase));
if (MessageUtil.IsRunServiceJob(message.MessageType))
{
@@ -72,7 +79,7 @@ namespace GitHub.Runner.Worker
launchServer.InitializeLaunchClient(new Uri(launchReceiverEndpoint), accessToken);
}
_jobServerQueue = HostContext.GetService<IJobServerQueue>();
_jobServerQueue.Start(message, resultsServiceOnly: true);
_jobServerQueue.Start(message, resultsServiceOnly: true, enableTelemetry: jobServerQueueTelemetry);
}
else
{
@@ -94,7 +101,7 @@ namespace GitHub.Runner.Worker
VssConnection jobConnection = VssUtil.CreateConnection(jobServerUrl, jobServerCredential, delegatingHandlers);
await jobServer.ConnectAsync(jobConnection);
_jobServerQueue.Start(message);
_jobServerQueue.Start(message, enableTelemetry: jobServerQueueTelemetry);
server = jobServer;
}
@@ -405,6 +412,13 @@ namespace GitHub.Runner.Worker
result = TaskResultUtil.MergeTaskResults(result, TaskResult.Failed);
}
// include any job telemetry from the background upload process.
if (_jobServerQueue != null &&
_jobServerQueue.JobTelemetries.Count > 0)
{
jobContext.Global.JobTelemetry.AddRange(_jobServerQueue.JobTelemetries);
}
// Clean TEMP after finish process jobserverqueue, since there might be a pending fileupload still use the TEMP dir.
_tempDirectoryManager?.CleanupTempDirectory();

View File

@@ -2,6 +2,7 @@
namespace GitHub.DistributedTask.WebApi
{
// do NOT add new enum since it will break backward compatibility with GHES
public enum JobTelemetryType
{
[EnumMember]