feat: Prevent blocking on transient runner registration failure (#297)

This enhances the controller to recreate the runner pod if the corresponding runner has failed to register itself to GitHub within 10 minutes(currently hard-coded).

It should alleviate #288 in case the root cause is some kind of transient failures(network unreliability, GitHub down, temporarly compute resource shortage, etc).

Formerly you had to manually detect and delete such pods or even force-delete corresponding runners to unblock the controller.

Since this enhancement, the controller does the pod deletion automatically after 10 minutes after pod creation, which result in the controller create another pod that might work.

Ref #288
This commit is contained in:
Yusuke Kuoka
2021-02-09 10:17:52 +09:00
committed by GitHub
parent 9301409aec
commit bbb036e732
3 changed files with 141 additions and 47 deletions

View File

@@ -124,7 +124,7 @@ func (c *Client) RemoveRunner(ctx context.Context, enterprise, org, repo string,
res, err := c.removeRunner(ctx, enterprise, owner, repo, runnerID)
if err != nil {
return fmt.Errorf("failed to remove runner: %v", err)
return fmt.Errorf("failed to remove runner: %w", err)
}
if res.StatusCode != 204 {
@@ -149,7 +149,7 @@ func (c *Client) ListRunners(ctx context.Context, enterprise, org, repo string)
list, res, err := c.listRunners(ctx, enterprise, owner, repo, &opts)
if err != nil {
return runners, fmt.Errorf("failed to list runners: %v", err)
return runners, fmt.Errorf("failed to list runners: %w", err)
}
runners = append(runners, list.Runners...)
@@ -282,3 +282,26 @@ func getEnterpriseApiUrl(baseURL string) (string, error) {
// Trim trailing slash, otherwise there's double slash added to token endpoint
return fmt.Sprintf("%s://%s%s", baseEndpoint.Scheme, baseEndpoint.Host, strings.TrimSuffix(baseEndpoint.Path, "/")), nil
}
type RunnerNotFound struct {
runnerName string
}
func (e RunnerNotFound) Error() string {
return fmt.Sprintf("runner %q not found", e.runnerName)
}
func (r *Client) IsRunnerBusy(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
runners, err := r.ListRunners(ctx, enterprise, org, repo)
if err != nil {
return false, err
}
for _, runner := range runners {
if runner.GetName() == name {
return runner.GetBusy(), nil
}
}
return false, RunnerNotFound{runnerName: name}
}