Fix overscaling when the controller is much faster then the listener (#3371)

Co-authored-by: Francesco Renzi <rentziass@gmail.com>
This commit is contained in:
Nikola Jokic
2024-03-20 15:36:12 +01:00
committed by GitHub
parent 46cfbb6ec7
commit 7a643a5107
16 changed files with 262 additions and 158 deletions

View File

@@ -34,7 +34,7 @@ type Listener interface {
//go:generate mockery --name Worker --output ./mocks --outpkg mocks --case underscore
type Worker interface {
HandleJobStarted(ctx context.Context, jobInfo *actions.JobStarted) error
HandleDesiredRunnerCount(ctx context.Context, count int) (int, error)
HandleDesiredRunnerCount(ctx context.Context, count int, jobsCompleted int) (int, error)
}
func New(config config.Config) (*App, error) {

View File

@@ -15,23 +15,23 @@ type Worker struct {
mock.Mock
}
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count
func (_m *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int, error) {
ret := _m.Called(ctx, count)
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count, acquireCount
func (_m *Worker) HandleDesiredRunnerCount(ctx context.Context, count int, acquireCount int) (int, error) {
ret := _m.Called(ctx, count, acquireCount)
var r0 int
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int) (int, error)); ok {
return rf(ctx, count)
if rf, ok := ret.Get(0).(func(context.Context, int, int) (int, error)); ok {
return rf(ctx, count, acquireCount)
}
if rf, ok := ret.Get(0).(func(context.Context, int) int); ok {
r0 = rf(ctx, count)
if rf, ok := ret.Get(0).(func(context.Context, int, int) int); ok {
r0 = rf(ctx, count, acquireCount)
} else {
r0 = ret.Get(0).(int)
}
if rf, ok := ret.Get(1).(func(context.Context, int) error); ok {
r1 = rf(ctx, count)
if rf, ok := ret.Get(1).(func(context.Context, int, int) error); ok {
r1 = rf(ctx, count, acquireCount)
} else {
r1 = ret.Error(1)
}

View File

@@ -114,7 +114,7 @@ func New(config Config) (*Listener, error) {
//go:generate mockery --name Handler --output ./mocks --outpkg mocks --case underscore
type Handler interface {
HandleJobStarted(ctx context.Context, jobInfo *actions.JobStarted) error
HandleDesiredRunnerCount(ctx context.Context, count int) (int, error)
HandleDesiredRunnerCount(ctx context.Context, count, jobsCompleted int) (int, error)
}
// Listen listens for incoming messages and handles them using the provided handler.
@@ -145,7 +145,7 @@ func (l *Listener) Listen(ctx context.Context, handler Handler) error {
}
l.metrics.PublishStatistics(initialMessage.Statistics)
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, initialMessage.Statistics.TotalAssignedJobs)
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, initialMessage.Statistics.TotalAssignedJobs, 0)
if err != nil {
return fmt.Errorf("handling initial message failed: %w", err)
}
@@ -207,7 +207,7 @@ func (l *Listener) handleMessage(ctx context.Context, handler Handler, msg *acti
l.metrics.PublishJobStarted(jobStarted)
}
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, parsedMsg.statistics.TotalAssignedJobs)
desiredRunners, err := handler.HandleDesiredRunnerCount(ctx, parsedMsg.statistics.TotalAssignedJobs, len(parsedMsg.jobsCompleted))
if err != nil {
return fmt.Errorf("failed to handle desired runner count: %w", err)
}
@@ -284,7 +284,6 @@ func (l *Listener) getMessage(ctx context.Context) (*actions.RunnerScaleSetMessa
}
return msg, nil
}
func (l *Listener) deleteLastMessage(ctx context.Context) error {

View File

@@ -427,7 +427,7 @@ func TestListener_Listen(t *testing.T) {
var called bool
handler := listenermocks.NewHandler(t)
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 0).
Return(0, nil).
Run(
func(mock.Arguments) {
@@ -485,11 +485,11 @@ func TestListener_Listen(t *testing.T) {
config.Client = client
handler := listenermocks.NewHandler(t)
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 0).
Return(0, nil).
Once()
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 0).
Return(0, nil).
Once()

View File

@@ -86,7 +86,7 @@ func TestInitialMetrics(t *testing.T) {
config.Client = client
handler := listenermocks.NewHandler(t)
handler.On("HandleDesiredRunnerCount", mock.Anything, sessionStatistics.TotalAssignedJobs).
handler.On("HandleDesiredRunnerCount", mock.Anything, sessionStatistics.TotalAssignedJobs, 0).
Return(sessionStatistics.TotalAssignedJobs, nil).
Once()
@@ -178,7 +178,7 @@ func TestHandleMessageMetrics(t *testing.T) {
handler := listenermocks.NewHandler(t)
handler.On("HandleJobStarted", mock.Anything, jobsStarted[0]).Return(nil).Once()
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything).Return(desiredResult, nil).Once()
handler.On("HandleDesiredRunnerCount", mock.Anything, mock.Anything, 2).Return(desiredResult, nil).Once()
client := listenermocks.NewClient(t)
client.On("DeleteMessage", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil).Once()

View File

@@ -15,23 +15,23 @@ type Handler struct {
mock.Mock
}
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count
func (_m *Handler) HandleDesiredRunnerCount(ctx context.Context, count int) (int, error) {
ret := _m.Called(ctx, count)
// HandleDesiredRunnerCount provides a mock function with given fields: ctx, count, jobsCompleted
func (_m *Handler) HandleDesiredRunnerCount(ctx context.Context, count int, jobsCompleted int) (int, error) {
ret := _m.Called(ctx, count, jobsCompleted)
var r0 int
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int) (int, error)); ok {
return rf(ctx, count)
if rf, ok := ret.Get(0).(func(context.Context, int, int) (int, error)); ok {
return rf(ctx, count, jobsCompleted)
}
if rf, ok := ret.Get(0).(func(context.Context, int) int); ok {
r0 = rf(ctx, count)
if rf, ok := ret.Get(0).(func(context.Context, int, int) int); ok {
r0 = rf(ctx, count, jobsCompleted)
} else {
r0 = ret.Get(0).(int)
}
if rf, ok := ret.Get(1).(func(context.Context, int) error); ok {
r1 = rf(ctx, count)
if rf, ok := ret.Get(1).(func(context.Context, int, int) error); ok {
r1 = rf(ctx, count, jobsCompleted)
} else {
r1 = ret.Error(1)
}

View File

@@ -38,18 +38,20 @@ type Config struct {
// The Worker's role is to process the messages it receives from the listener.
// It then initiates Kubernetes API requests to carry out the necessary actions.
type Worker struct {
clientset *kubernetes.Clientset
config Config
lastPatch int
logger *logr.Logger
clientset *kubernetes.Clientset
config Config
lastPatch int
lastPatchID int
logger *logr.Logger
}
var _ listener.Handler = (*Worker)(nil)
func New(config Config, options ...Option) (*Worker, error) {
w := &Worker{
config: config,
lastPatch: -1,
config: config,
lastPatch: -1,
lastPatchID: -1,
}
conf, err := rest.InClusterConfig()
@@ -161,7 +163,7 @@ func (w *Worker) HandleJobStarted(ctx context.Context, jobInfo *actions.JobStart
// The function then scales the ephemeral runner set by applying the merge patch.
// Finally, it logs the scaled ephemeral runner set details and returns nil if successful.
// If any error occurs during the process, it returns an error with a descriptive message.
func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int, error) {
func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int, jobsCompleted int) (int, error) {
// Max runners should always be set by the resource builder either to the configured value,
// or the maximum int32 (resourcebuilder.newAutoScalingListener()).
targetRunnerCount := min(w.config.MinRunners+count, w.config.MaxRunners)
@@ -172,17 +174,22 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int,
"min", w.config.MinRunners,
"max", w.config.MaxRunners,
"currentRunnerCount", w.lastPatch,
"jobsCompleted", jobsCompleted,
}
if targetRunnerCount == w.lastPatch {
w.logger.Info("Skipping patching of EphemeralRunnerSet as the desired count has not changed", logValues...)
if w.lastPatch == targetRunnerCount && jobsCompleted == 0 {
w.logger.Info("Skipping patch", logValues...)
return targetRunnerCount, nil
}
w.lastPatchID++
w.lastPatch = targetRunnerCount
original, err := json.Marshal(
&v1alpha1.EphemeralRunnerSet{
Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: -1,
PatchID: -1,
},
},
)
@@ -194,6 +201,7 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) (int,
&v1alpha1.EphemeralRunnerSet{
Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: targetRunnerCount,
PatchID: w.lastPatchID,
},
},
)