mirror of
https://github.com/actions/runner.git
synced 2025-12-12 15:13:30 +00:00
Add trace to help debug IPC message corruption in runner. (#1587)
* Add trace to help debug IPC message corruption in runner. * .
This commit is contained in:
@@ -2,17 +2,19 @@ using System;
|
|||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using GitHub.DistributedTask.WebApi;
|
using GitHub.DistributedTask.WebApi;
|
||||||
using GitHub.Runner.Common.Util;
|
|
||||||
using GitHub.Services.WebApi;
|
|
||||||
using Pipelines = GitHub.DistributedTask.Pipelines;
|
|
||||||
using System.Linq;
|
|
||||||
using GitHub.Services.Common;
|
|
||||||
using GitHub.Runner.Common;
|
using GitHub.Runner.Common;
|
||||||
|
using GitHub.Runner.Common.Util;
|
||||||
using GitHub.Runner.Sdk;
|
using GitHub.Runner.Sdk;
|
||||||
|
using GitHub.Services.Common;
|
||||||
|
using GitHub.Services.WebApi;
|
||||||
using GitHub.Services.WebApi.Jwt;
|
using GitHub.Services.WebApi.Jwt;
|
||||||
|
using Pipelines = GitHub.DistributedTask.Pipelines;
|
||||||
|
|
||||||
namespace GitHub.Runner.Listener
|
namespace GitHub.Runner.Listener
|
||||||
{
|
{
|
||||||
@@ -34,6 +36,7 @@ namespace GitHub.Runner.Listener
|
|||||||
// and the server will not send another job while this one is still running.
|
// and the server will not send another job while this one is still running.
|
||||||
public sealed class JobDispatcher : RunnerService, IJobDispatcher
|
public sealed class JobDispatcher : RunnerService, IJobDispatcher
|
||||||
{
|
{
|
||||||
|
private static Regex _invalidJsonRegex = new Regex(@"invalid\ Json\ at\ position\ '(\d+)':", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||||
private readonly Lazy<Dictionary<long, TaskResult>> _localRunJobResult = new Lazy<Dictionary<long, TaskResult>>();
|
private readonly Lazy<Dictionary<long, TaskResult>> _localRunJobResult = new Lazy<Dictionary<long, TaskResult>>();
|
||||||
private int _poolId;
|
private int _poolId;
|
||||||
|
|
||||||
@@ -964,6 +967,30 @@ namespace GitHub.Runner.Listener
|
|||||||
TimelineRecord jobRecord = timeline.Records.FirstOrDefault(x => x.Id == message.JobId && x.RecordType == "Job");
|
TimelineRecord jobRecord = timeline.Records.FirstOrDefault(x => x.Id == message.JobId && x.RecordType == "Job");
|
||||||
ArgUtil.NotNull(jobRecord, nameof(jobRecord));
|
ArgUtil.NotNull(jobRecord, nameof(jobRecord));
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrEmpty(errorMessage) &&
|
||||||
|
message.Variables.TryGetValue("DistributedTask.EnableRunnerIPCDebug", out var enableRunnerIPCDebug) &&
|
||||||
|
StringUtil.ConvertToBoolean(enableRunnerIPCDebug.Value))
|
||||||
|
{
|
||||||
|
// the trace should be best effort and not affect any job result
|
||||||
|
var match = _invalidJsonRegex.Match(errorMessage);
|
||||||
|
if (match.Success &&
|
||||||
|
match.Groups.Count == 2)
|
||||||
|
{
|
||||||
|
var jsonPosition = int.Parse(match.Groups[1].Value);
|
||||||
|
var serializedJobMessage = JsonUtility.ToString(message);
|
||||||
|
var originalJson = serializedJobMessage.Substring(jsonPosition - 10, 20);
|
||||||
|
errorMessage = $"Runner sent Json at position '{jsonPosition}': {originalJson} ({Convert.ToBase64String(Encoding.UTF8.GetBytes(originalJson))})\n{errorMessage}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Trace.Error(ex);
|
||||||
|
errorMessage = $"Fail to check json IPC error: {ex.Message}\n{errorMessage}";
|
||||||
|
}
|
||||||
|
|
||||||
var unhandledExceptionIssue = new Issue() { Type = IssueType.Error, Message = errorMessage };
|
var unhandledExceptionIssue = new Issue() { Type = IssueType.Error, Message = errorMessage };
|
||||||
unhandledExceptionIssue.Data[Constants.Runner.InternalTelemetryIssueDataKey] = Constants.Runner.WorkerCrash;
|
unhandledExceptionIssue.Data[Constants.Runner.InternalTelemetryIssueDataKey] = Constants.Runner.WorkerCrash;
|
||||||
jobRecord.ErrorCount++;
|
jobRecord.ErrorCount++;
|
||||||
|
|||||||
@@ -425,6 +425,7 @@ namespace GitHub.Runner.Listener
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
Trace.Info($"Received job message of length {message.Body.Length} from service, with hash '{IOUtil.GetSha256Hash(message.Body)}'");
|
||||||
var jobMessage = StringUtil.ConvertFromJson<Pipelines.AgentJobRequestMessage>(message.Body);
|
var jobMessage = StringUtil.ConvertFromJson<Pipelines.AgentJobRequestMessage>(message.Body);
|
||||||
jobDispatcher.Run(jobMessage, runOnce);
|
jobDispatcher.Run(jobMessage, runOnce);
|
||||||
if (runOnce)
|
if (runOnce)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ using System.Threading.Tasks;
|
|||||||
using GitHub.Services.WebApi;
|
using GitHub.Services.WebApi;
|
||||||
using GitHub.Runner.Common;
|
using GitHub.Runner.Common;
|
||||||
using GitHub.Runner.Sdk;
|
using GitHub.Runner.Sdk;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
namespace GitHub.Runner.Worker
|
namespace GitHub.Runner.Worker
|
||||||
{
|
{
|
||||||
@@ -43,6 +44,7 @@ namespace GitHub.Runner.Worker
|
|||||||
ArgUtil.NotNullOrEmpty(pipeOut, nameof(pipeOut));
|
ArgUtil.NotNullOrEmpty(pipeOut, nameof(pipeOut));
|
||||||
VssUtil.InitializeVssClientSettings(HostContext.UserAgents, HostContext.WebProxy);
|
VssUtil.InitializeVssClientSettings(HostContext.UserAgents, HostContext.WebProxy);
|
||||||
var jobRunner = HostContext.CreateService<IJobRunner>();
|
var jobRunner = HostContext.CreateService<IJobRunner>();
|
||||||
|
var terminal = HostContext.GetService<ITerminal>();
|
||||||
|
|
||||||
using (var channel = HostContext.CreateService<IProcessChannel>())
|
using (var channel = HostContext.CreateService<IProcessChannel>())
|
||||||
using (var jobRequestCancellationToken = CancellationTokenSource.CreateLinkedTokenSource(HostContext.RunnerShutdownToken))
|
using (var jobRequestCancellationToken = CancellationTokenSource.CreateLinkedTokenSource(HostContext.RunnerShutdownToken))
|
||||||
@@ -64,7 +66,22 @@ namespace GitHub.Runner.Worker
|
|||||||
Trace.Info("Message received.");
|
Trace.Info("Message received.");
|
||||||
ArgUtil.Equal(MessageType.NewJobRequest, channelMessage.MessageType, nameof(channelMessage.MessageType));
|
ArgUtil.Equal(MessageType.NewJobRequest, channelMessage.MessageType, nameof(channelMessage.MessageType));
|
||||||
ArgUtil.NotNullOrEmpty(channelMessage.Body, nameof(channelMessage.Body));
|
ArgUtil.NotNullOrEmpty(channelMessage.Body, nameof(channelMessage.Body));
|
||||||
var jobMessage = StringUtil.ConvertFromJson<Pipelines.AgentJobRequestMessage>(channelMessage.Body);
|
Pipelines.AgentJobRequestMessage jobMessage = null;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
jobMessage = StringUtil.ConvertFromJson<Pipelines.AgentJobRequestMessage>(channelMessage.Body);
|
||||||
|
}
|
||||||
|
catch (JsonReaderException ex)
|
||||||
|
{
|
||||||
|
if (channelMessage.Body.Length > ex.LinePosition + 10)
|
||||||
|
{
|
||||||
|
var errorChunk = channelMessage.Body.Substring(ex.LinePosition - 10, 20);
|
||||||
|
terminal.WriteError($"Worker received invalid Json at position '{ex.LinePosition}': {errorChunk} ({Convert.ToBase64String(Encoding.UTF8.GetBytes(errorChunk))})");
|
||||||
|
}
|
||||||
|
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
ArgUtil.NotNull(jobMessage, nameof(jobMessage));
|
ArgUtil.NotNull(jobMessage, nameof(jobMessage));
|
||||||
HostContext.WritePerfCounter($"WorkerJobMessageReceived_{jobMessage.RequestId.ToString()}");
|
HostContext.WritePerfCounter($"WorkerJobMessageReceived_{jobMessage.RequestId.ToString()}");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user