mirror of
https://github.com/actions/actions-runner-controller.git
synced 2025-12-20 06:56:51 +00:00
Fix overscaling when the controller is much faster then the listener (#3371)
Co-authored-by: Francesco Renzi <rentziass@gmail.com>
This commit is contained in:
@@ -34,7 +34,7 @@ type Listener interface {
|
||||
//go:generate mockery --name Worker --output ./mocks --outpkg mocks --case underscore
|
||||
type Worker interface {
|
||||
HandleJobStarted(ctx context.Context, jobInfo *actions.JobStarted) error
|
||||
HandleDesiredRunnerCount(ctx context.Context, count int) (int, error)
|
||||
HandleDesiredRunnerCount(ctx context.Context, count int, jobsCompleted int) (int, error)
|
||||
}
|
||||
|
||||
func New(config config.Config) (*App, error) {
|
||||
|
||||
@@ -15,23 +15,23 @@ type Worker struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count
|
||||
func (_m *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int, error) {
|
||||
ret := _m.Called(ctx, count)
|
||||
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count, acquireCount
|
||||
func (_m *Worker) HandleDesiredRunnerCount(ctx context.Context, count int, acquireCount int) (int, error) {
|
||||
ret := _m.Called(ctx, count, acquireCount)
|
||||
|
||||
var r0 int
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int) (int, error)); ok {
|
||||
return rf(ctx, count)
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int, int) (int, error)); ok {
|
||||
return rf(ctx, count, acquireCount)
|
||||
}
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int) int); ok {
|
||||
r0 = rf(ctx, count)
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int, int) int); ok {
|
||||
r0 = rf(ctx, count, acquireCount)
|
||||
} else {
|
||||
r0 = ret.Get(0).(int)
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int) error); ok {
|
||||
r1 = rf(ctx, count)
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int, int) error); ok {
|
||||
r1 = rf(ctx, count, acquireCount)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
@@ -114,7 +114,7 @@ func New(config Config) (*Listener, error) {
|
||||
//go:generate mockery --name Handler --output ./mocks --outpkg mocks --case underscore
|
||||
type Handler interface {
|
||||
HandleJobStarted(ctx context.Context, jobInfo *actions.JobStarted) error
|
||||
HandleDesiredRunnerCount(ctx context.Context, count int) (int, error)
|
||||
HandleDesiredRunnerCount(ctx context.Context, count, jobsCompleted int) (int, error)
|
||||
}
|
||||
|
||||
// Listen listens for incoming messages and handles them using the provided handler.
|
||||
@@ -145,7 +145,7 @@ func (l *Listener) Listen(ctx context.Context, handler Handler) error {
|
||||
}
|
||||
l.metrics.PublishStatistics(initialMessage.Statistics)
|
||||
|
||||
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, initialMessage.Statistics.TotalAssignedJobs)
|
||||
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, initialMessage.Statistics.TotalAssignedJobs, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("handling initial message failed: %w", err)
|
||||
}
|
||||
@@ -207,7 +207,7 @@ func (l *Listener) handleMessage(ctx context.Context, handler Handler, msg *acti
|
||||
l.metrics.PublishJobStarted(jobStarted)
|
||||
}
|
||||
|
||||
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, parsedMsg.statistics.TotalAssignedJobs)
|
||||
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, parsedMsg.statistics.TotalAssignedJobs, len(parsedMsg.jobsCompleted))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to handle desired runner count: %w", err)
|
||||
}
|
||||
@@ -284,7 +284,6 @@ func (l *Listener) getMessage(ctx context.Context) (*actions.RunnerScaleSetMessa
|
||||
}
|
||||
|
||||
return msg, nil
|
||||
|
||||
}
|
||||
|
||||
func (l *Listener) deleteLastMessage(ctx context.Context) error {
|
||||
|
||||
@@ -427,7 +427,7 @@ func TestListener_Listen(t *testing.T) {
|
||||
|
||||
var called bool
|
||||
handler := listenermocks.NewHandler(t)
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 0).
|
||||
Return(0, nil).
|
||||
Run(
|
||||
func(mock.Arguments) {
|
||||
@@ -485,11 +485,11 @@ func TestListener_Listen(t *testing.T) {
|
||||
config.Client = client
|
||||
|
||||
handler := listenermocks.NewHandler(t)
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 0).
|
||||
Return(0, nil).
|
||||
Once()
|
||||
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 0).
|
||||
Return(0, nil).
|
||||
Once()
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ func TestInitialMetrics(t *testing.T) {
|
||||
config.Client = client
|
||||
|
||||
handler := listenermocks.NewHandler(t)
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, sessionStatistics.TotalAssignedJobs).
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, sessionStatistics.TotalAssignedJobs, 0).
|
||||
Return(sessionStatistics.TotalAssignedJobs, nil).
|
||||
Once()
|
||||
|
||||
@@ -178,7 +178,7 @@ func TestHandleMessageMetrics(t *testing.T) {
|
||||
|
||||
handler := listenermocks.NewHandler(t)
|
||||
handler.On("HandleJobStarted", mock.Anything, jobsStarted[0]).Return(nil).Once()
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).Return(desiredResult, nil).Once()
|
||||
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 2).Return(desiredResult, nil).Once()
|
||||
|
||||
client := listenermocks.NewClient(t)
|
||||
client.On("DeleteMessage", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil).Once()
|
||||
|
||||
@@ -15,23 +15,23 @@ type Handler struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count
|
||||
func (_m *Handler) HandleDesiredRunnerCount(ctx context.Context, count int) (int, error) {
|
||||
ret := _m.Called(ctx, count)
|
||||
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count, jobsCompleted
|
||||
func (_m *Handler) HandleDesiredRunnerCount(ctx context.Context, count int, jobsCompleted int) (int, error) {
|
||||
ret := _m.Called(ctx, count, jobsCompleted)
|
||||
|
||||
var r0 int
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int) (int, error)); ok {
|
||||
return rf(ctx, count)
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int, int) (int, error)); ok {
|
||||
return rf(ctx, count, jobsCompleted)
|
||||
}
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int) int); ok {
|
||||
r0 = rf(ctx, count)
|
||||
if rf, ok := ret.Get(0).(func(context.Context, int, int) int); ok {
|
||||
r0 = rf(ctx, count, jobsCompleted)
|
||||
} else {
|
||||
r0 = ret.Get(0).(int)
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int) error); ok {
|
||||
r1 = rf(ctx, count)
|
||||
if rf, ok := ret.Get(1).(func(context.Context, int, int) error); ok {
|
||||
r1 = rf(ctx, count, jobsCompleted)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
@@ -38,18 +38,20 @@ type Config struct {
|
||||
// The Worker's role is to process the messages it receives from the listener.
|
||||
// It then initiates Kubernetes API requests to carry out the necessary actions.
|
||||
type Worker struct {
|
||||
clientset *kubernetes.Clientset
|
||||
config Config
|
||||
lastPatch int
|
||||
logger *logr.Logger
|
||||
clientset *kubernetes.Clientset
|
||||
config Config
|
||||
lastPatch int
|
||||
lastPatchID int
|
||||
logger *logr.Logger
|
||||
}
|
||||
|
||||
var _ listener.Handler = (*Worker)(nil)
|
||||
|
||||
func New(config Config, options ...Option) (*Worker, error) {
|
||||
w := &Worker{
|
||||
config: config,
|
||||
lastPatch: -1,
|
||||
config: config,
|
||||
lastPatch: -1,
|
||||
lastPatchID: -1,
|
||||
}
|
||||
|
||||
conf, err := rest.InClusterConfig()
|
||||
@@ -161,7 +163,7 @@ func (w *Worker) HandleJobStarted(ctx context.Context, jobInfo *actions.JobStart
|
||||
// The function then scales the ephemeral runner set by applying the merge patch.
|
||||
// Finally, it logs the scaled ephemeral runner set details and returns nil if successful.
|
||||
// If any error occurs during the process, it returns an error with a descriptive message.
|
||||
func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int, error) {
|
||||
func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int, jobsCompleted int) (int, error) {
|
||||
// Max runners should always be set by the resource builder either to the configured value,
|
||||
// or the maximum int32 (resourcebuilder.newAutoScalingListener()).
|
||||
targetRunnerCount := min(w.config.MinRunners+count, w.config.MaxRunners)
|
||||
@@ -172,17 +174,22 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int,
|
||||
"min", w.config.MinRunners,
|
||||
"max", w.config.MaxRunners,
|
||||
"currentRunnerCount", w.lastPatch,
|
||||
"jobsCompleted", jobsCompleted,
|
||||
}
|
||||
|
||||
if targetRunnerCount == w.lastPatch {
|
||||
w.logger.Info("Skipping patching of EphemeralRunnerSet as the desired count has not changed", logValues...)
|
||||
if w.lastPatch == targetRunnerCount && jobsCompleted == 0 {
|
||||
w.logger.Info("Skipping patch", logValues...)
|
||||
return targetRunnerCount, nil
|
||||
}
|
||||
|
||||
w.lastPatchID++
|
||||
w.lastPatch = targetRunnerCount
|
||||
|
||||
original, err := json.Marshal(
|
||||
&v1alpha1.EphemeralRunnerSet{
|
||||
Spec: v1alpha1.EphemeralRunnerSetSpec{
|
||||
Replicas: -1,
|
||||
PatchID: -1,
|
||||
},
|
||||
},
|
||||
)
|
||||
@@ -194,6 +201,7 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int,
|
||||
&v1alpha1.EphemeralRunnerSet{
|
||||
Spec: v1alpha1.EphemeralRunnerSetSpec{
|
||||
Replicas: targetRunnerCount,
|
||||
PatchID: w.lastPatchID,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user