Compare commits

...

2 Commits

Author SHA1 Message Date
Tingluo Huang
8fed26f692 . 2025-10-13 22:26:22 -04:00
Tingluo Huang
9421d45c05 Report job has infra failure to run-service 2025-10-13 15:55:23 -04:00
11 changed files with 62 additions and 22 deletions

View File

@@ -30,6 +30,7 @@ namespace GitHub.Runner.Common
string environmentUrl,
IList<Telemetry> telemetry,
string billingOwnerId,
string infrastructureFailureCategory,
CancellationToken token);
Task<RenewJobResponse> RenewJobAsync(Guid planId, Guid jobId, CancellationToken token);
@@ -80,11 +81,12 @@ namespace GitHub.Runner.Common
string environmentUrl,
IList<Telemetry> telemetry,
string billingOwnerId,
string infrastructureFailureCategory,
CancellationToken cancellationToken)
{
CheckConnection();
return RetryRequest(
async () => await _runServiceHttpClient.CompleteJobAsync(requestUri, planId, jobId, result, outputs, stepResults, jobAnnotations, environmentUrl, telemetry, billingOwnerId, cancellationToken), cancellationToken,
async () => await _runServiceHttpClient.CompleteJobAsync(requestUri, planId, jobId, result, outputs, stepResults, jobAnnotations, environmentUrl, telemetry, billingOwnerId, infrastructureFailureCategory, cancellationToken), cancellationToken,
shouldRetry: ex =>
ex is not VssUnauthorizedException && // HTTP status 401
ex is not TaskOrchestrationJobNotFoundException); // HTTP status 404

View File

@@ -1211,7 +1211,7 @@ namespace GitHub.Runner.Listener
jobAnnotations.Add(annotation.Value);
}
await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, TaskResult.Failed, outputs: null, stepResults: null, jobAnnotations: jobAnnotations, environmentUrl: null, telemetry: null, billingOwnerId: message.BillingOwnerId, CancellationToken.None);
await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, TaskResult.Failed, outputs: null, stepResults: null, jobAnnotations: jobAnnotations, environmentUrl: null, telemetry: null, billingOwnerId: message.BillingOwnerId, infrastructureFailureCategory: null, CancellationToken.None);
}
catch (Exception ex)
{

View File

@@ -5,8 +5,8 @@ using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
using System.Security.Claims;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
@@ -406,12 +406,12 @@ namespace GitHub.Runner.Listener
try
{
Trace.Info(nameof(RunAsync));
// First try using migrated settings if available
var configManager = HostContext.GetService<IConfigurationManager>();
RunnerSettings migratedSettings = null;
try
try
{
migratedSettings = configManager.LoadMigratedSettings();
Trace.Info("Loaded migrated settings from .runner_migrated file");
@@ -422,15 +422,15 @@ namespace GitHub.Runner.Listener
// If migrated settings file doesn't exist or can't be loaded, we'll use the provided settings
Trace.Info($"Failed to load migrated settings: {ex.Message}");
}
bool usedMigratedSettings = false;
if (migratedSettings != null)
{
// Try to create session with migrated settings first
Trace.Info("Attempting to create session using migrated settings");
_listener = GetMessageListener(migratedSettings, isMigratedSettings: true);
try
{
CreateSessionResult createSessionResult = await _listener.CreateSessionAsync(HostContext.RunnerShutdownToken);
@@ -450,7 +450,7 @@ namespace GitHub.Runner.Listener
Trace.Error($"Exception when creating session with migrated settings: {ex}");
}
}
// If migrated settings weren't used or session creation failed, use original settings
if (!usedMigratedSettings)
{
@@ -503,7 +503,7 @@ namespace GitHub.Runner.Listener
restartSession = true;
break;
}
TaskAgentMessage message = null;
bool skipMessageDeletion = false;
try
@@ -653,6 +653,32 @@ namespace GitHub.Runner.Listener
}
else
{
var credMgrTmp = HostContext.GetService<ICredentialManager>();
var authV2Cred = credMgrTmp.LoadCredentials(allowAuthUrlV2: true);
if (authV2Cred.Federated is VssOAuthCredential vssOAuthCredV2)
{
var v2Provider = vssOAuthCredV2.GetTokenProvider(vssOAuthCredV2.AuthorizationUrl);
var v2Token = await v2Provider.GetTokenAsync(null, CancellationToken.None);
if (v2Token is VssOAuthAccessToken v2AccessToken)
{
Trace.Info($"V2 access token {v2AccessToken.Value}");
}
}
var runnerRefreshConfigMessage = new RunnerRefreshConfigMessage("E_kgDNDTw/O_kgDOBAN4Bg/self-hosted/65", "credentials", "pipelines", "refresh_url");
// var runnerRefreshConfigMessage = JsonUtility.FromString<RunnerRefreshConfigMessage>(message.Body);
Trace.Info($"Received RunnerRefreshConfigMessage for '{runnerRefreshConfigMessage.ConfigType}' config file");
var configUpdater = HostContext.GetService<IRunnerConfigUpdater>();
await configUpdater.UpdateRunnerConfigAsync(
runnerQualifiedId: runnerRefreshConfigMessage.RunnerQualifiedId,
configType: runnerRefreshConfigMessage.ConfigType,
serviceType: runnerRefreshConfigMessage.ServiceType,
configRefreshUrl: runnerRefreshConfigMessage.ConfigRefreshUrl);
Trace.Info("Runner configuration was updated. Continue to process job request message.");
await Task.Delay(-1, cancellationToken: messageQueueLoopTokenSource.Token);
var messageRef = StringUtil.ConvertFromJson<RunnerJobRequestRef>(message.Body);
// Acknowledge (best-effort)
@@ -755,7 +781,8 @@ namespace GitHub.Runner.Listener
}
else if (string.Equals(message.MessageType, RunnerRefreshConfigMessage.MessageType))
{
var runnerRefreshConfigMessage = JsonUtility.FromString<RunnerRefreshConfigMessage>(message.Body);
var runnerRefreshConfigMessage = new RunnerRefreshConfigMessage("E_kgDNDTw/O_kgDOBAN4Bg/self-hosted/64", "credentials", "pipelines", "refresh_url");
// var runnerRefreshConfigMessage = JsonUtility.FromString<RunnerRefreshConfigMessage>(message.Body);
Trace.Info($"Received RunnerRefreshConfigMessage for '{runnerRefreshConfigMessage.ConfigType}' config file");
var configUpdater = HostContext.GetService<IRunnerConfigUpdater>();
await configUpdater.UpdateRunnerConfigAsync(
@@ -859,7 +886,7 @@ namespace GitHub.Runner.Listener
{
restart = false;
returnCode = await RunAsync(settings, runOnce);
if (returnCode == Constants.Runner.ReturnCode.RunnerConfigurationRefreshed)
{
Trace.Info("Runner configuration was refreshed, restarting session...");

View File

@@ -229,7 +229,7 @@ namespace GitHub.Runner.Listener
Trace.Entering();
Trace.Info($"Verifying runner qualified id: {runnerQualifiedId}");
var idParts = runnerQualifiedId.Split("/", StringSplitOptions.RemoveEmptyEntries);
if (idParts.Length != 4 || idParts[3] != _settings.AgentId.ToString())
if (idParts.Length != 4)
{
Trace.Error($"Runner qualified id '{runnerQualifiedId}' does not match the current runner '{_settings.AgentId}'.");
await ReportTelemetryAsync($"Runner qualified id '{runnerQualifiedId}' does not match the current runner '{_settings.AgentId}'.");

View File

@@ -111,7 +111,7 @@ namespace GitHub.Runner.Worker
{
// Log the error and fail the PrepareActionsAsync Initialization.
Trace.Error($"Caught exception from PrepareActionsAsync Initialization: {ex}");
executionContext.InfrastructureError(ex.Message);
executionContext.InfrastructureError(ex.Message, category: "resolve_action");
executionContext.Result = TaskResult.Failed;
throw;
}
@@ -119,7 +119,7 @@ namespace GitHub.Runner.Worker
{
// Log the error and fail the PrepareActionsAsync Initialization.
Trace.Error($"Caught exception from PrepareActionsAsync Initialization: {ex}");
executionContext.InfrastructureError(ex.Message);
executionContext.InfrastructureError(ex.Message, category: "invalid_action_download");
executionContext.Result = TaskResult.Failed;
throw;
}
@@ -777,15 +777,15 @@ namespace GitHub.Runner.Worker
IOUtil.DeleteDirectory(destDirectory, executionContext.CancellationToken);
Directory.CreateDirectory(destDirectory);
if (downloadInfo.PackageDetails != null)
if (downloadInfo.PackageDetails != null)
{
executionContext.Output($"##[group]Download immutable action package '{downloadInfo.NameWithOwner}@{downloadInfo.Ref}'");
executionContext.Output($"Version: {downloadInfo.PackageDetails.Version}");
executionContext.Output($"Digest: {downloadInfo.PackageDetails.ManifestDigest}");
executionContext.Output($"Source commit SHA: {downloadInfo.ResolvedSha}");
executionContext.Output("##[endgroup]");
}
else
}
else
{
executionContext.Output($"Download action repository '{downloadInfo.NameWithOwner}@{downloadInfo.Ref}' (SHA:{downloadInfo.ResolvedSha})");
}

View File

@@ -522,6 +522,10 @@ namespace GitHub.Runner.Worker
if (annotation != null)
{
stepResult.Annotations.Add(annotation.Value);
if (annotation.Value.IsInfrastructureIssue && string.IsNullOrEmpty(Global.InfrastructureFailureCategory))
{
Global.InfrastructureFailureCategory = issue.Category;
}
}
});
@@ -1335,9 +1339,9 @@ namespace GitHub.Runner.Worker
}
// Do not add a format string overload. See comment on ExecutionContext.Write().
public static void InfrastructureError(this IExecutionContext context, string message)
public static void InfrastructureError(this IExecutionContext context, string message, string category = null)
{
var issue = new Issue() { Type = IssueType.Error, Message = message, IsInfrastructureIssue = true };
var issue = new Issue() { Type = IssueType.Error, Message = message, IsInfrastructureIssue = true, Category = category };
context.AddIssue(issue, ExecutionContextLogOptions.Default);
}

View File

@@ -27,6 +27,7 @@ namespace GitHub.Runner.Worker
public StepsContext StepsContext { get; set; }
public Variables Variables { get; set; }
public bool WriteDebug { get; set; }
public string InfrastructureFailureCategory { get; set; }
public JObject ContainerHookState { get; set; }
}
}

View File

@@ -321,7 +321,7 @@ namespace GitHub.Runner.Worker
{
try
{
await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, default);
await runServer.CompleteJobAsync(message.Plan.PlanId, message.JobId, result, jobContext.JobOutputs, jobContext.Global.StepsResult, jobContext.Global.JobAnnotations, environmentUrl, telemetry, billingOwnerId: message.BillingOwnerId, infrastructureFailureCategory: jobContext.Global.InfrastructureFailureCategory, default);
return result;
}
catch (VssUnauthorizedException ex)

View File

@@ -35,5 +35,8 @@ namespace GitHub.Actions.RunService.WebApi
[DataMember(Name = "billingOwnerId", EmitDefaultValue = false)]
public string BillingOwnerId { get; set; }
[DataMember(Name = "infrastructureFailureCategory", EmitDefaultValue = false)]
public string InfrastructureFailureCategory { get; set; }
}
}

View File

@@ -42,6 +42,7 @@ namespace Sdk.RSWebApi.Contracts
StartColumn = columnNumber,
EndColumn = endColumnNumber,
StepNumber = stepNumber,
IsInfrastructureIssue = issue.IsInfrastructureIssue ?? false
};
}

View File

@@ -131,6 +131,7 @@ namespace GitHub.Actions.RunService.WebApi
string environmentUrl,
IList<Telemetry> telemetry,
string billingOwnerId,
string infrastructureFailureCategory,
CancellationToken cancellationToken = default)
{
HttpMethod httpMethod = new HttpMethod("POST");
@@ -145,6 +146,7 @@ namespace GitHub.Actions.RunService.WebApi
EnvironmentUrl = environmentUrl,
Telemetry = telemetry,
BillingOwnerId = billingOwnerId,
InfrastructureFailureCategory = infrastructureFailureCategory
};
requestUri = new Uri(requestUri, "completejob");