mirror of
https://github.com/actions/runner.git
synced 2025-12-10 12:36:23 +00:00
Compare commits
12 Commits
copilot/fi
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b9772a0fb4 | ||
|
|
f8ae5bb1a7 | ||
|
|
a5631456a2 | ||
|
|
65dfa460ba | ||
|
|
80ee51f164 | ||
|
|
c95883f28e | ||
|
|
6e940643a9 | ||
|
|
629f2384a4 | ||
|
|
c3bf70becb | ||
|
|
8b65f5f9df | ||
|
|
5f1efec208 | ||
|
|
20d82ad357 |
@@ -4,7 +4,7 @@
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/docker-in-docker:1": {},
|
||||
"ghcr.io/devcontainers/features/dotnet": {
|
||||
"version": "8.0.412"
|
||||
"version": "8.0.413"
|
||||
},
|
||||
"ghcr.io/devcontainers/features/node:1": {
|
||||
"version": "20"
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -338,7 +338,7 @@ jobs:
|
||||
org.opencontainers.image.licenses=MIT
|
||||
|
||||
- name: Generate attestation
|
||||
uses: actions/attest-build-provenance@v2
|
||||
uses: actions/attest-build-provenance@v3
|
||||
with:
|
||||
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
subject-digest: ${{ steps.build-and-push.outputs.digest }}
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -27,4 +27,4 @@ TestResults
|
||||
TestLogs
|
||||
.DS_Store
|
||||
.mono
|
||||
**/*.DotSettings.user/tmp/
|
||||
**/*.DotSettings.user
|
||||
@@ -1,6 +1 @@
|
||||
#!/usr/bin/env sh
|
||||
. "$(dirname -- "$0")/_/husky.sh"
|
||||
|
||||
cd src/Misc/expressionFunc/hashFiles
|
||||
|
||||
npx lint-staged
|
||||
cd src/Misc/expressionFunc/hashFiles && npx lint-staged
|
||||
|
||||
@@ -5,8 +5,8 @@ ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
ARG RUNNER_VERSION
|
||||
ARG RUNNER_CONTAINER_HOOKS_VERSION=0.7.0
|
||||
ARG DOCKER_VERSION=28.3.2
|
||||
ARG BUILDX_VERSION=0.26.1
|
||||
ARG DOCKER_VERSION=28.3.3
|
||||
ARG BUILDX_VERSION=0.27.0
|
||||
|
||||
RUN apt update -y && apt install curl unzip -y
|
||||
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
# KillMode Change Implementation Summary
|
||||
|
||||
## Problem Addressed
|
||||
|
||||
The question "is this a good idea?" regarding "killmode changing?" has been thoroughly analyzed and addressed through a minimal but impactful change to the GitHub Actions Runner systemd service configuration.
|
||||
|
||||
## Solution Implemented
|
||||
|
||||
**Changed**: `KillMode=process` → `KillMode=mixed` in `src/Misc/layoutbin/actions.runner.service.template`
|
||||
|
||||
## Why This Change Makes Sense
|
||||
|
||||
### Evidence from Codebase Analysis
|
||||
1. **Orphan Process Concerns**: The codebase contains extensive orphan process cleanup mechanisms in:
|
||||
- `JobExtension.cs`: Tracks and cleans up orphan processes using `RUNNER_TRACKING_ID`
|
||||
- `JobDispatcher.cs`: Prevents orphan worker processes
|
||||
- `ProcessInvoker.cs`: Implements process tree termination
|
||||
|
||||
2. **Current Signal Flow**:
|
||||
- systemd → runsvc.sh (SIGTERM) → Node.js process (SIGINT)
|
||||
- Relies on runsvc.sh successfully forwarding signals
|
||||
|
||||
### Benefits of KillMode=mixed
|
||||
|
||||
1. **Maintains Graceful Shutdown**: Main process (runsvc.sh) still receives SIGTERM first
|
||||
2. **Adds Safety Net**: systemd ensures cleanup if signal forwarding fails
|
||||
3. **Better Process Tree Cleanup**: More robust handling of complex job hierarchies
|
||||
4. **Reduced Orphan Risk**: Addresses concerns evident throughout the codebase
|
||||
5. **Container Compatibility**: Better termination of containerized workloads
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### Files Changed
|
||||
- `src/Misc/layoutbin/actions.runner.service.template`: Single line change
|
||||
- Added comprehensive test coverage in `src/Test/L0/Misc/SystemdServiceTemplateL0.cs`
|
||||
- Created analysis documentation and testing tools
|
||||
|
||||
### Testing
|
||||
- ✅ Build succeeds with no errors
|
||||
- ✅ New tests validate the change
|
||||
- ✅ Existing functionality unchanged
|
||||
- ✅ Layout generation includes the change
|
||||
|
||||
## Impact Assessment
|
||||
|
||||
### Risk Level: **LOW**
|
||||
- Only affects service shutdown behavior
|
||||
- No changes to startup or normal operation
|
||||
- Backward compatible with existing signal handling
|
||||
- Testable with standard systemd tools
|
||||
|
||||
### Compatibility
|
||||
- Maintains existing runsvc.sh signal forwarding behavior
|
||||
- Compatible with all existing process handling code
|
||||
- No breaking changes to APIs or interfaces
|
||||
|
||||
## Testing Tools Provided
|
||||
|
||||
Created `/tmp/killmode-test.sh` script that allows administrators to:
|
||||
- Test different KillMode configurations
|
||||
- Compare process cleanup behavior
|
||||
- Validate signal handling works correctly
|
||||
|
||||
## Conclusion
|
||||
|
||||
This change represents a **good idea** because it:
|
||||
1. Addresses real orphan process concerns evident in the codebase
|
||||
2. Provides better reliability with minimal risk
|
||||
3. Maintains existing graceful shutdown behavior
|
||||
4. Adds systemd's robust process cleanup as a safety net
|
||||
5. Requires only a single line change with comprehensive testing
|
||||
|
||||
The implementation follows the principle of making the smallest possible change while addressing the underlying concern about process cleanup reliability.
|
||||
@@ -1,120 +0,0 @@
|
||||
# GitHub Actions Runner KillMode Analysis
|
||||
|
||||
## Problem Statement
|
||||
The question "is this a good idea?" regarding "killmode changing?" asks us to evaluate whether the current systemd `KillMode=process` setting should be changed to a different option.
|
||||
|
||||
## Current Implementation
|
||||
|
||||
### Systemd Service Configuration
|
||||
- **KillMode**: `process` (only main process gets signal)
|
||||
- **KillSignal**: `SIGTERM`
|
||||
- **TimeoutStopSec**: `5min`
|
||||
|
||||
### Signal Handling Flow
|
||||
1. systemd sends SIGTERM to `runsvc.sh` (main process)
|
||||
2. `runsvc.sh` has trap: `trap 'kill -INT $PID' TERM INT`
|
||||
3. Converts SIGTERM → SIGINT and sends to Node.js runner process
|
||||
4. Node.js process handles graceful shutdown
|
||||
|
||||
## Analysis of Current Approach
|
||||
|
||||
### Strengths
|
||||
1. **Graceful Shutdown Control**: Manual signal conversion allows proper Node.js shutdown handling
|
||||
2. **Predictable Behavior**: Only main process receives systemd signals
|
||||
3. **Custom Logic**: Allows for runner-specific shutdown procedures
|
||||
4. **Signal Compatibility**: SIGINT is more commonly handled by Node.js applications
|
||||
|
||||
### Potential Issues
|
||||
1. **Single Point of Failure**: If `runsvc.sh` fails to forward signals, child processes orphaned
|
||||
2. **Complex Chain**: More components in signal propagation path
|
||||
3. **Process Tree Cleanup**: May not handle deep process hierarchies as robustly
|
||||
|
||||
## Orphan Process Context
|
||||
|
||||
The codebase reveals significant effort to handle orphan processes:
|
||||
|
||||
### Evidence from Code Analysis
|
||||
1. **JobExtension.cs**: Dedicated orphan process cleanup mechanism
|
||||
- Tracks processes before/after job execution
|
||||
- Uses `RUNNER_TRACKING_ID` environment variable
|
||||
- Terminates orphan processes at job completion
|
||||
|
||||
2. **JobDispatcher.cs**: Worker process orphan prevention
|
||||
- Explicit waits to prevent orphan worker processes
|
||||
- Handles "zombie worker" scenarios
|
||||
|
||||
3. **ProcessInvoker.cs**: Process tree termination
|
||||
- Implements both Windows and Unix process tree killing
|
||||
- Signal escalation: SIGINT → SIGTERM → SIGKILL
|
||||
|
||||
## Alternative KillMode Options
|
||||
|
||||
### KillMode=control-group
|
||||
**Behavior**: All processes in service's cgroup get SIGTERM, then SIGKILL after timeout
|
||||
|
||||
**Pros**:
|
||||
- Robust cleanup of entire process tree
|
||||
- Built-in systemd guarantees
|
||||
- Simpler signal flow
|
||||
- No dependency on runsvc.sh signal forwarding
|
||||
|
||||
**Cons**:
|
||||
- Less control over shutdown sequence
|
||||
- All processes get SIGTERM simultaneously
|
||||
- May interrupt graceful shutdown of worker processes
|
||||
|
||||
### KillMode=mixed
|
||||
**Behavior**: Main process gets SIGTERM, remaining processes get SIGKILL after timeout
|
||||
|
||||
**Pros**:
|
||||
- Combines benefits of both approaches
|
||||
- Main process can handle graceful shutdown
|
||||
- Systemd ensures process tree cleanup
|
||||
- Fallback protection against orphan processes
|
||||
|
||||
**Cons**:
|
||||
- More complex behavior
|
||||
- Still depends on main process signal handling
|
||||
|
||||
## Security and Reliability Considerations
|
||||
|
||||
### Current Risks
|
||||
1. If `runsvc.sh` crashes before forwarding signals, Node.js process continues running
|
||||
2. Deep process trees from job execution may not be properly cleaned up
|
||||
3. Container processes might not receive proper termination signals
|
||||
|
||||
### Reliability Improvements with control-group/mixed
|
||||
1. systemd guarantees process cleanup regardless of main process behavior
|
||||
2. Reduces risk of orphan processes surviving service shutdown
|
||||
3. More predictable behavior for administrators
|
||||
|
||||
## Recommendation
|
||||
|
||||
### Recommended Change: KillMode=mixed
|
||||
|
||||
**Rationale**:
|
||||
1. **Maintains Graceful Shutdown**: Main process (runsvc.sh) still receives SIGTERM first
|
||||
2. **Adds Safety Net**: systemd ensures cleanup if main process fails to handle signals
|
||||
3. **Reduces Orphan Risk**: Addresses the orphan process concerns evident in the codebase
|
||||
4. **Better Process Tree Handling**: More robust for complex job process hierarchies
|
||||
5. **Container Compatibility**: Better handling of containerized workloads
|
||||
|
||||
### Implementation Impact
|
||||
- **Low Risk**: Change only affects service shutdown behavior
|
||||
- **Backward Compatible**: No changes to startup or normal operation
|
||||
- **Testable**: Can be validated with process monitoring during service stops
|
||||
|
||||
### Alternative Considerations
|
||||
- **KillMode=control-group** could be considered if graceful shutdown proves problematic
|
||||
- Current **KillMode=process** could remain if the signal forwarding is deemed reliable enough
|
||||
|
||||
## Testing Recommendations
|
||||
|
||||
1. Test service shutdown with various job types running
|
||||
2. Verify process cleanup with nested process trees
|
||||
3. Test container job termination scenarios
|
||||
4. Monitor for any regressions in graceful shutdown behavior
|
||||
|
||||
## Conclusion
|
||||
|
||||
Changing to `KillMode=mixed` would provide a good balance between maintaining the current graceful shutdown behavior while adding systemd's robust process cleanup guarantees. This addresses the orphan process concerns evident throughout the codebase while maintaining compatibility.
|
||||
5392
src/Misc/expressionFunc/hashFiles/package-lock.json
generated
5392
src/Misc/expressionFunc/hashFiles/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,7 +10,7 @@
|
||||
"lint": "eslint src/**/*.ts",
|
||||
"pack": "ncc build -o ../../layoutbin/hashFiles",
|
||||
"all": "npm run format && npm run lint && npm run build && npm run pack",
|
||||
"prepare": "cd ../../../../ && husky install"
|
||||
"prepare": "cd ../../../../ && husky"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -36,15 +36,15 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.6.2",
|
||||
"@typescript-eslint/eslint-plugin": "^6.7.2",
|
||||
"@typescript-eslint/eslint-plugin": "^6.21.0",
|
||||
"@typescript-eslint/parser": "^6.7.2",
|
||||
"@vercel/ncc": "^0.38.0",
|
||||
"@vercel/ncc": "^0.38.3",
|
||||
"eslint": "^8.47.0",
|
||||
"eslint-plugin-github": "^4.10.0",
|
||||
"eslint-plugin-github": "^6.0.0",
|
||||
"eslint-plugin-prettier": "^5.0.0",
|
||||
"husky": "^8.0.3",
|
||||
"husky": "^9.1.7",
|
||||
"lint-staged": "^15.5.0",
|
||||
"prettier": "^3.0.3",
|
||||
"typescript": "^5.2.2"
|
||||
"typescript": "^5.9.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ After=network.target
|
||||
ExecStart={{RunnerRoot}}/runsvc.sh
|
||||
User={{User}}
|
||||
WorkingDirectory={{RunnerRoot}}
|
||||
KillMode=mixed
|
||||
KillMode=process
|
||||
KillSignal=SIGTERM
|
||||
TimeoutStopSec=5min
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
SECONDS=0
|
||||
while [[ $SECONDS != $1 ]]; do
|
||||
while [[ $SECONDS -lt $1 ]]; do
|
||||
:
|
||||
done
|
||||
|
||||
@@ -23,6 +23,8 @@ namespace GitHub.Runner.Common
|
||||
|
||||
Task<TaskAgentMessage> GetRunnerMessageAsync(Guid? sessionId, TaskAgentStatus status, string version, string os, string architecture, bool disableUpdate, CancellationToken token);
|
||||
|
||||
Task AcknowledgeRunnerRequestAsync(string runnerRequestId, Guid? sessionId, TaskAgentStatus status, string version, string os, string architecture, CancellationToken token);
|
||||
|
||||
Task UpdateConnectionIfNeeded(Uri serverUri, VssCredentials credentials);
|
||||
|
||||
Task ForceRefreshConnection(VssCredentials credentials);
|
||||
@@ -67,10 +69,17 @@ namespace GitHub.Runner.Common
|
||||
var brokerSession = RetryRequest<TaskAgentMessage>(
|
||||
async () => await _brokerHttpClient.GetRunnerMessageAsync(sessionId, version, status, os, architecture, disableUpdate, cancellationToken), cancellationToken, shouldRetry: ShouldRetryException);
|
||||
|
||||
|
||||
return brokerSession;
|
||||
}
|
||||
|
||||
public async Task AcknowledgeRunnerRequestAsync(string runnerRequestId, Guid? sessionId, TaskAgentStatus status, string version, string os, string architecture, CancellationToken cancellationToken)
|
||||
{
|
||||
CheckConnection();
|
||||
|
||||
// No retries
|
||||
await _brokerHttpClient.AcknowledgeRunnerRequestAsync(runnerRequestId, sessionId, version, status, os, architecture, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task DeleteSessionAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
CheckConnection();
|
||||
|
||||
@@ -70,7 +70,7 @@ namespace GitHub.Runner.Common
|
||||
|
||||
protected async Task RetryRequest(Func<Task> func,
|
||||
CancellationToken cancellationToken,
|
||||
int maxRetryAttemptsCount = 5,
|
||||
int maxAttempts = 5,
|
||||
Func<Exception, bool> shouldRetry = null
|
||||
)
|
||||
{
|
||||
@@ -79,31 +79,31 @@ namespace GitHub.Runner.Common
|
||||
await func();
|
||||
return Unit.Value;
|
||||
}
|
||||
await RetryRequest<Unit>(wrappedFunc, cancellationToken, maxRetryAttemptsCount, shouldRetry);
|
||||
await RetryRequest<Unit>(wrappedFunc, cancellationToken, maxAttempts, shouldRetry);
|
||||
}
|
||||
|
||||
protected async Task<T> RetryRequest<T>(Func<Task<T>> func,
|
||||
CancellationToken cancellationToken,
|
||||
int maxRetryAttemptsCount = 5,
|
||||
int maxAttempts = 5,
|
||||
Func<Exception, bool> shouldRetry = null
|
||||
)
|
||||
{
|
||||
var retryCount = 0;
|
||||
var attempt = 0;
|
||||
while (true)
|
||||
{
|
||||
retryCount++;
|
||||
attempt++;
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
try
|
||||
{
|
||||
return await func();
|
||||
}
|
||||
// TODO: Add handling of non-retriable exceptions: https://github.com/github/actions-broker/issues/122
|
||||
catch (Exception ex) when (retryCount < maxRetryAttemptsCount && (shouldRetry == null || shouldRetry(ex)))
|
||||
catch (Exception ex) when (attempt < maxAttempts && (shouldRetry == null || shouldRetry(ex)))
|
||||
{
|
||||
Trace.Error("Catch exception during request");
|
||||
Trace.Error(ex);
|
||||
var backOff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(15));
|
||||
Trace.Warning($"Back off {backOff.TotalSeconds} seconds before next retry. {maxRetryAttemptsCount - retryCount} attempt left.");
|
||||
Trace.Warning($"Back off {backOff.TotalSeconds} seconds before next retry. {maxAttempts - attempt} attempt left.");
|
||||
await Task.Delay(backOff, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace GitHub.Runner.Listener
|
||||
private RunnerSettings _settings;
|
||||
private ITerminal _term;
|
||||
private TimeSpan _getNextMessageRetryInterval;
|
||||
private TaskAgentStatus runnerStatus = TaskAgentStatus.Online;
|
||||
private TaskAgentStatus _runnerStatus = TaskAgentStatus.Online;
|
||||
private CancellationTokenSource _getMessagesTokenSource;
|
||||
private VssCredentials _creds;
|
||||
private VssCredentials _credsV2;
|
||||
@@ -258,7 +258,7 @@ namespace GitHub.Runner.Listener
|
||||
public void OnJobStatus(object sender, JobStatusEventArgs e)
|
||||
{
|
||||
Trace.Info("Received job status event. JobState: {0}", e.Status);
|
||||
runnerStatus = e.Status;
|
||||
_runnerStatus = e.Status;
|
||||
try
|
||||
{
|
||||
_getMessagesTokenSource?.Cancel();
|
||||
@@ -291,7 +291,7 @@ namespace GitHub.Runner.Listener
|
||||
}
|
||||
|
||||
message = await _brokerServer.GetRunnerMessageAsync(_session.SessionId,
|
||||
runnerStatus,
|
||||
_runnerStatus,
|
||||
BuildConstants.RunnerPackage.Version,
|
||||
VarUtil.OS,
|
||||
VarUtil.OSArchitecture,
|
||||
@@ -417,6 +417,21 @@ namespace GitHub.Runner.Listener
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
public async Task AcknowledgeMessageAsync(string runnerRequestId, CancellationToken cancellationToken)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); // Short timeout
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token);
|
||||
Trace.Info($"Acknowledging runner request '{runnerRequestId}'.");
|
||||
await _brokerServer.AcknowledgeRunnerRequestAsync(
|
||||
runnerRequestId,
|
||||
_session.SessionId,
|
||||
_runnerStatus,
|
||||
BuildConstants.RunnerPackage.Version,
|
||||
VarUtil.OS,
|
||||
VarUtil.OSArchitecture,
|
||||
linkedCts.Token);
|
||||
}
|
||||
|
||||
private bool IsGetNextMessageExceptionRetriable(Exception ex)
|
||||
{
|
||||
if (ex is TaskAgentNotFoundException ||
|
||||
|
||||
@@ -32,6 +32,7 @@ namespace GitHub.Runner.Listener
|
||||
Task DeleteSessionAsync();
|
||||
Task<TaskAgentMessage> GetNextMessageAsync(CancellationToken token);
|
||||
Task DeleteMessageAsync(TaskAgentMessage message);
|
||||
Task AcknowledgeMessageAsync(string runnerRequestId, CancellationToken cancellationToken);
|
||||
|
||||
Task RefreshListenerTokenAsync();
|
||||
void OnJobStatus(object sender, JobStatusEventArgs e);
|
||||
@@ -52,7 +53,7 @@ namespace GitHub.Runner.Listener
|
||||
private readonly TimeSpan _sessionConflictRetryLimit = TimeSpan.FromMinutes(4);
|
||||
private readonly TimeSpan _clockSkewRetryLimit = TimeSpan.FromMinutes(30);
|
||||
private readonly Dictionary<string, int> _sessionCreationExceptionTracker = new();
|
||||
private TaskAgentStatus runnerStatus = TaskAgentStatus.Online;
|
||||
private TaskAgentStatus _runnerStatus = TaskAgentStatus.Online;
|
||||
private CancellationTokenSource _getMessagesTokenSource;
|
||||
private VssCredentials _creds;
|
||||
private VssCredentials _credsV2;
|
||||
@@ -217,7 +218,7 @@ namespace GitHub.Runner.Listener
|
||||
public void OnJobStatus(object sender, JobStatusEventArgs e)
|
||||
{
|
||||
Trace.Info("Received job status event. JobState: {0}", e.Status);
|
||||
runnerStatus = e.Status;
|
||||
_runnerStatus = e.Status;
|
||||
try
|
||||
{
|
||||
_getMessagesTokenSource?.Cancel();
|
||||
@@ -250,7 +251,7 @@ namespace GitHub.Runner.Listener
|
||||
message = await _runnerServer.GetAgentMessageAsync(_settings.PoolId,
|
||||
_session.SessionId,
|
||||
_lastMessageId,
|
||||
runnerStatus,
|
||||
_runnerStatus,
|
||||
BuildConstants.RunnerPackage.Version,
|
||||
VarUtil.OS,
|
||||
VarUtil.OSArchitecture,
|
||||
@@ -274,7 +275,7 @@ namespace GitHub.Runner.Listener
|
||||
}
|
||||
|
||||
message = await _brokerServer.GetRunnerMessageAsync(_session.SessionId,
|
||||
runnerStatus,
|
||||
_runnerStatus,
|
||||
BuildConstants.RunnerPackage.Version,
|
||||
VarUtil.OS,
|
||||
VarUtil.OSArchitecture,
|
||||
@@ -437,6 +438,21 @@ namespace GitHub.Runner.Listener
|
||||
await _brokerServer.ForceRefreshConnection(_credsV2);
|
||||
}
|
||||
|
||||
public async Task AcknowledgeMessageAsync(string runnerRequestId, CancellationToken cancellationToken)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); // Short timeout
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token);
|
||||
Trace.Info($"Acknowledging runner request '{runnerRequestId}'.");
|
||||
await _brokerServer.AcknowledgeRunnerRequestAsync(
|
||||
runnerRequestId,
|
||||
_session.SessionId,
|
||||
_runnerStatus,
|
||||
BuildConstants.RunnerPackage.Version,
|
||||
VarUtil.OS,
|
||||
VarUtil.OSArchitecture,
|
||||
linkedCts.Token);
|
||||
}
|
||||
|
||||
private TaskAgentMessage DecryptMessage(TaskAgentMessage message)
|
||||
{
|
||||
if (_session.EncryptionKey == null ||
|
||||
|
||||
@@ -654,22 +654,42 @@ namespace GitHub.Runner.Listener
|
||||
else
|
||||
{
|
||||
var messageRef = StringUtil.ConvertFromJson<RunnerJobRequestRef>(message.Body);
|
||||
Pipelines.AgentJobRequestMessage jobRequestMessage = null;
|
||||
|
||||
// Create connection
|
||||
var credMgr = HostContext.GetService<ICredentialManager>();
|
||||
// Acknowledge (best-effort)
|
||||
if (messageRef.ShouldAcknowledge) // Temporary feature flag
|
||||
{
|
||||
try
|
||||
{
|
||||
await _listener.AcknowledgeMessageAsync(messageRef.RunnerRequestId, messageQueueLoopTokenSource.Token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Trace.Error($"Best-effort acknowledge failed for request '{messageRef.RunnerRequestId}'");
|
||||
Trace.Error(ex);
|
||||
}
|
||||
}
|
||||
|
||||
Pipelines.AgentJobRequestMessage jobRequestMessage = null;
|
||||
if (string.IsNullOrEmpty(messageRef.RunServiceUrl))
|
||||
{
|
||||
// Connect
|
||||
var credMgr = HostContext.GetService<ICredentialManager>();
|
||||
var creds = credMgr.LoadCredentials(allowAuthUrlV2: false);
|
||||
var actionsRunServer = HostContext.CreateService<IActionsRunServer>();
|
||||
await actionsRunServer.ConnectAsync(new Uri(settings.ServerUrl), creds);
|
||||
|
||||
// Get job message
|
||||
jobRequestMessage = await actionsRunServer.GetJobMessageAsync(messageRef.RunnerRequestId, messageQueueLoopTokenSource.Token);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Connect
|
||||
var credMgr = HostContext.GetService<ICredentialManager>();
|
||||
var credsV2 = credMgr.LoadCredentials(allowAuthUrlV2: true);
|
||||
var runServer = HostContext.CreateService<IRunServer>();
|
||||
await runServer.ConnectAsync(new Uri(messageRef.RunServiceUrl), credsV2);
|
||||
|
||||
// Get job message
|
||||
try
|
||||
{
|
||||
jobRequestMessage = await runServer.GetJobMessageAsync(messageRef.RunnerRequestId, messageRef.BillingOwnerId, messageQueueLoopTokenSource.Token);
|
||||
@@ -698,7 +718,10 @@ namespace GitHub.Runner.Listener
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch
|
||||
jobDispatcher.Run(jobRequestMessage, runOnce);
|
||||
|
||||
// Run once?
|
||||
if (runOnce)
|
||||
{
|
||||
Trace.Info("One time used runner received job message.");
|
||||
|
||||
@@ -10,6 +10,9 @@ namespace GitHub.Runner.Listener
|
||||
|
||||
[DataMember(Name = "runner_request_id")]
|
||||
public string RunnerRequestId { get; set; }
|
||||
|
||||
[DataMember(Name = "should_acknowledge")]
|
||||
public bool ShouldAcknowledge { get; set; }
|
||||
|
||||
[DataMember(Name = "run_service_url")]
|
||||
public string RunServiceUrl { get; set; }
|
||||
|
||||
@@ -79,6 +79,7 @@ namespace GitHub.Actions.RunService.WebApi
|
||||
{
|
||||
queryParams.Add("status", status.Value.ToString());
|
||||
}
|
||||
|
||||
if (runnerVersion != null)
|
||||
{
|
||||
queryParams.Add("runnerVersion", runnerVersion);
|
||||
@@ -142,7 +143,6 @@ namespace GitHub.Actions.RunService.WebApi
|
||||
}
|
||||
|
||||
public async Task<TaskAgentSession> CreateSessionAsync(
|
||||
|
||||
TaskAgentSession session,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
@@ -191,6 +191,76 @@ namespace GitHub.Actions.RunService.WebApi
|
||||
throw new Exception($"Failed to delete broker session: {result.Error}");
|
||||
}
|
||||
|
||||
public async Task AcknowledgeRunnerRequestAsync(
|
||||
string runnerRequestId,
|
||||
Guid? sessionId,
|
||||
string runnerVersion,
|
||||
TaskAgentStatus? status,
|
||||
string os = null,
|
||||
string architecture = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// URL
|
||||
var requestUri = new Uri(Client.BaseAddress, "acknowledge");
|
||||
|
||||
// Query parameters
|
||||
List<KeyValuePair<string, string>> queryParams = new List<KeyValuePair<string, string>>();
|
||||
if (sessionId != null)
|
||||
{
|
||||
queryParams.Add("sessionId", sessionId.Value.ToString());
|
||||
}
|
||||
if (status != null)
|
||||
{
|
||||
queryParams.Add("status", status.Value.ToString());
|
||||
}
|
||||
if (runnerVersion != null)
|
||||
{
|
||||
queryParams.Add("runnerVersion", runnerVersion);
|
||||
}
|
||||
if (os != null)
|
||||
{
|
||||
queryParams.Add("os", os);
|
||||
}
|
||||
if (architecture != null)
|
||||
{
|
||||
queryParams.Add("architecture", architecture);
|
||||
}
|
||||
|
||||
// Body
|
||||
var payload = new Dictionary<string, string>
|
||||
{
|
||||
["runnerRequestId"] = runnerRequestId,
|
||||
};
|
||||
var requestContent = new ObjectContent<Dictionary<string, string>>(payload, new VssJsonMediaTypeFormatter(true));
|
||||
|
||||
// POST
|
||||
var result = await SendAsync<object>(
|
||||
new HttpMethod("POST"),
|
||||
requestUri: requestUri,
|
||||
queryParameters: queryParams,
|
||||
content: requestContent,
|
||||
readErrorBody: true,
|
||||
cancellationToken: cancellationToken);
|
||||
|
||||
if (result.IsSuccess)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (TryParseErrorBody(result.ErrorBody, out BrokerError brokerError))
|
||||
{
|
||||
switch (brokerError.ErrorKind)
|
||||
{
|
||||
case BrokerErrorKind.RunnerNotFound:
|
||||
throw new RunnerNotFoundException(brokerError.Message);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Exception($"Failed to acknowledge runner request. Request to {requestUri} failed with status: {result.StatusCode}. Error message {result.Error}");
|
||||
}
|
||||
|
||||
private static bool TryParseErrorBody(string errorBody, out BrokerError error)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(errorBody))
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace GitHub.Runner.Common.Tests.Misc
|
||||
{
|
||||
public sealed class SystemdServiceTemplateL0
|
||||
{
|
||||
[Fact]
|
||||
[Trait("Level", "L0")]
|
||||
[Trait("Category", "Common")]
|
||||
public void ServiceTemplate_ContainsExpectedKillMode()
|
||||
{
|
||||
// Arrange
|
||||
var templatePath = Path.Combine(TestUtil.GetSrcPath(), "Misc", "layoutbin", "actions.runner.service.template");
|
||||
|
||||
// Act
|
||||
var templateContent = File.ReadAllText(templatePath);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("KillMode=mixed", templateContent);
|
||||
Assert.Contains("KillSignal=SIGTERM", templateContent);
|
||||
Assert.Contains("TimeoutStopSec=5min", templateContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[Trait("Level", "L0")]
|
||||
[Trait("Category", "Common")]
|
||||
public void ServiceTemplate_HasValidStructure()
|
||||
{
|
||||
// Arrange
|
||||
var templatePath = Path.Combine(TestUtil.GetSrcPath(), "Misc", "layoutbin", "actions.runner.service.template");
|
||||
|
||||
// Act
|
||||
var templateContent = File.ReadAllText(templatePath);
|
||||
var lines = templateContent.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
// Assert
|
||||
Assert.Contains("[Unit]", lines);
|
||||
Assert.Contains("[Service]", lines);
|
||||
Assert.Contains("[Install]", lines);
|
||||
Assert.Contains("Description={{Description}}", lines);
|
||||
Assert.Contains("ExecStart={{RunnerRoot}}/runsvc.sh", lines);
|
||||
Assert.Contains("User={{User}}", lines);
|
||||
Assert.Contains("WorkingDirectory={{RunnerRoot}}", lines);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ LAYOUT_DIR="$SCRIPT_DIR/../_layout"
|
||||
DOWNLOAD_DIR="$SCRIPT_DIR/../_downloads/netcore2x"
|
||||
PACKAGE_DIR="$SCRIPT_DIR/../_package"
|
||||
DOTNETSDK_ROOT="$SCRIPT_DIR/../_dotnetsdk"
|
||||
DOTNETSDK_VERSION="8.0.412"
|
||||
DOTNETSDK_VERSION="8.0.413"
|
||||
DOTNETSDK_INSTALLDIR="$DOTNETSDK_ROOT/$DOTNETSDK_VERSION"
|
||||
RUNNER_VERSION=$(cat runnerversion)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"sdk": {
|
||||
"version": "8.0.412"
|
||||
"version": "8.0.413"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user