diff --git a/src/Runner.Common/JobServerQueue.cs b/src/Runner.Common/JobServerQueue.cs index 6234b4bdd..e6a00f1c8 100644 --- a/src/Runner.Common/JobServerQueue.cs +++ b/src/Runner.Common/JobServerQueue.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Linq; using System.Threading; @@ -14,10 +15,11 @@ namespace GitHub.Runner.Common [ServiceLocator(Default = typeof(JobServerQueue))] public interface IJobServerQueue : IRunnerService, IThrottlingReporter { + IList JobTelemetries { get; } TaskCompletionSource JobRecordUpdated { get; } event EventHandler JobServerQueueThrottling; Task ShutdownAsync(); - void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false); + void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false, bool enableTelemetry = false); void QueueWebConsoleLine(Guid stepRecordId, string line, long? lineNumber = null); void QueueFileUpload(Guid timelineId, Guid timelineRecordId, string type, string name, string path, bool deleteSource); void QueueResultsUpload(Guid timelineRecordId, string name, string path, string type, bool deleteSource, bool finalize, bool firstBlock, long totalLines); @@ -69,13 +71,18 @@ namespace GitHub.Runner.Common private Task[] _allDequeueTasks; private readonly TaskCompletionSource _jobCompletionSource = new(); private readonly TaskCompletionSource _jobRecordUpdated = new(); + private readonly List _jobTelemetries = new(); private bool _queueInProcess = false; private bool _resultsServiceOnly = false; + private Stopwatch _resultsUploadTimer = new(); + private Stopwatch _actionsUploadTimer = new(); public TaskCompletionSource JobRecordUpdated => _jobRecordUpdated; public event EventHandler JobServerQueueThrottling; + public IList JobTelemetries => _jobTelemetries; + // Web console dequeue will start with process queue every 250ms for the first 60*4 times (~60 seconds). // Then the dequeue will happen every 500ms. // In this way, customer still can get instance live console output on job start, @@ -87,6 +94,7 @@ namespace GitHub.Runner.Common private bool _firstConsoleOutputs = true; private bool _resultsClientInitiated = false; + private bool _enableTelemetry = false; private delegate Task ResultsFileUploadHandler(ResultsUploadFileInfo file); public override void Initialize(IHostContext hostContext) @@ -96,10 +104,11 @@ namespace GitHub.Runner.Common _resultsServer = hostContext.GetService(); } - public void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false) + public void Start(Pipelines.AgentJobRequestMessage jobRequest, bool resultsServiceOnly = false, bool enableTelemetry = false) { Trace.Entering(); _resultsServiceOnly = resultsServiceOnly; + _enableTelemetry = enableTelemetry; var serviceEndPoint = jobRequest.Resources.Endpoints.Single(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase)); @@ -211,6 +220,12 @@ namespace GitHub.Runner.Common await _resultsServer.DisposeAsync(); Trace.Info("All queue process tasks have been stopped, and all queues are drained."); + if (_enableTelemetry) + { + var uploadTimeComparison = $"Actions upload time: {_actionsUploadTimer.ElapsedMilliseconds} ms, Result upload time: {_resultsUploadTimer.ElapsedMilliseconds} ms"; + Trace.Info(uploadTimeComparison); + _jobTelemetries.Add(new JobTelemetry() { Type = JobTelemetryType.General, Message = uploadTimeComparison }); + } } public void QueueWebConsoleLine(Guid stepRecordId, string line, long? lineNumber) @@ -456,6 +471,10 @@ namespace GitHub.Runner.Common { try { + if (_enableTelemetry) + { + _actionsUploadTimer.Start(); + } await UploadFile(file); } catch (Exception ex) @@ -471,6 +490,13 @@ namespace GitHub.Runner.Common // _fileUploadQueue.Enqueue(file); //} } + finally + { + if (_enableTelemetry) + { + _actionsUploadTimer.Stop(); + } + } } Trace.Info("Try to upload {0} log files or attachments, success rate: {1}/{0}.", filesToUpload.Count, filesToUpload.Count - errorCount); @@ -517,6 +543,10 @@ namespace GitHub.Runner.Common { try { + if (_enableTelemetry) + { + _resultsUploadTimer.Start(); + } if (String.Equals(file.Type, ChecksAttachmentType.StepSummary, StringComparison.OrdinalIgnoreCase)) { await UploadSummaryFile(file); @@ -548,6 +578,13 @@ namespace GitHub.Runner.Common SendResultsTelemetry(ex); } } + finally + { + if (_enableTelemetry) + { + _resultsUploadTimer.Stop(); + } + } } Trace.Info("Tried to upload {0} file(s) to results, success rate: {1}/{0}.", filesToUpload.Count, filesToUpload.Count - errorCount); diff --git a/src/Runner.Worker/JobRunner.cs b/src/Runner.Worker/JobRunner.cs index aa6580402..4e2c4e95a 100644 --- a/src/Runner.Worker/JobRunner.cs +++ b/src/Runner.Worker/JobRunner.cs @@ -51,6 +51,13 @@ namespace GitHub.Runner.Worker HostContext.UserAgents.Add(new ProductInfoHeaderValue("OrchestrationId", orchestrationId.Value)); } + var jobServerQueueTelemetry = false; + if (message.Variables.TryGetValue("DistributedTask.EnableJobServerQueueTelemetry", out VariableValue enableJobServerQueueTelemetry) && + !string.IsNullOrEmpty(enableJobServerQueueTelemetry?.Value)) + { + jobServerQueueTelemetry = StringUtil.ConvertToBoolean(enableJobServerQueueTelemetry.Value); + } + ServiceEndpoint systemConnection = message.Resources.Endpoints.Single(x => string.Equals(x.Name, WellKnownServiceEndpointNames.SystemVssConnection, StringComparison.OrdinalIgnoreCase)); if (MessageUtil.IsRunServiceJob(message.MessageType)) { @@ -72,7 +79,7 @@ namespace GitHub.Runner.Worker launchServer.InitializeLaunchClient(new Uri(launchReceiverEndpoint), accessToken); } _jobServerQueue = HostContext.GetService(); - _jobServerQueue.Start(message, resultsServiceOnly: true); + _jobServerQueue.Start(message, resultsServiceOnly: true, enableTelemetry: jobServerQueueTelemetry); } else { @@ -94,7 +101,7 @@ namespace GitHub.Runner.Worker VssConnection jobConnection = VssUtil.CreateConnection(jobServerUrl, jobServerCredential, delegatingHandlers); await jobServer.ConnectAsync(jobConnection); - _jobServerQueue.Start(message); + _jobServerQueue.Start(message, enableTelemetry: jobServerQueueTelemetry); server = jobServer; } @@ -405,6 +412,13 @@ namespace GitHub.Runner.Worker result = TaskResultUtil.MergeTaskResults(result, TaskResult.Failed); } + // include any job telemetry from the background upload process. + if (_jobServerQueue != null && + _jobServerQueue.JobTelemetries.Count > 0) + { + jobContext.Global.JobTelemetry.AddRange(_jobServerQueue.JobTelemetries); + } + // Clean TEMP after finish process jobserverqueue, since there might be a pending fileupload still use the TEMP dir. _tempDirectoryManager?.CleanupTempDirectory(); diff --git a/src/Sdk/DTWebApi/WebApi/JobTelemetryType.cs b/src/Sdk/DTWebApi/WebApi/JobTelemetryType.cs index 88c14c695..2228a9e5e 100644 --- a/src/Sdk/DTWebApi/WebApi/JobTelemetryType.cs +++ b/src/Sdk/DTWebApi/WebApi/JobTelemetryType.cs @@ -2,6 +2,7 @@ namespace GitHub.DistributedTask.WebApi { + // do NOT add new enum since it will break backward compatibility with GHES public enum JobTelemetryType { [EnumMember]