mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-10 19:50:30 +00:00
Handle offline runners gracefully (#341)
* if a runner pod starts up with an invalid token, it will go in an infinite retry loop, appearing as RUNNING from the outside * normally, this error situation is detected because no corresponding runner objects exists in GitHub and the pod will get removed after registration timeout * if the GitHub runner object already existed before - e.g. because a finalizer was not properly run as part of a partial Kubernetes crash, the runner will always stay in a running mode, even updating the registration token will not kill the problematic pod * introducing RunnerOffline exception that can be handled in runner controller and replicaset controller * as runners are offline when a pod is completed and marked for restart, only do additional restart checks if no restart was already decided, making code a bit cleaner and saving GitHub API calls after each job completion
This commit is contained in:
@@ -310,6 +310,14 @@ func (e *RunnerNotFound) Error() string {
|
||||
return fmt.Sprintf("runner %q not found", e.runnerName)
|
||||
}
|
||||
|
||||
type RunnerOffline struct {
|
||||
runnerName string
|
||||
}
|
||||
|
||||
func (e *RunnerOffline) Error() string {
|
||||
return fmt.Sprintf("runner %q offline", e.runnerName)
|
||||
}
|
||||
|
||||
func (r *Client) IsRunnerBusy(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
||||
runners, err := r.ListRunners(ctx, enterprise, org, repo)
|
||||
if err != nil {
|
||||
@@ -318,6 +326,9 @@ func (r *Client) IsRunnerBusy(ctx context.Context, enterprise, org, repo, name s
|
||||
|
||||
for _, runner := range runners {
|
||||
if runner.GetName() == name {
|
||||
if runner.GetStatus() == "offline" {
|
||||
return false, &RunnerOffline{runnerName: name}
|
||||
}
|
||||
return runner.GetBusy(), nil
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user