1770 update log format and add additional fields to webhook server logs (#1771)

* 1770 update log format and add runID and Id to worflow logs

update tests, change log format for controllers.HorizontalRunnerAutoscalerGitHubWebhook

use logging package

remove unused modules

add setup name to setuplog

add flag to change log format

change flag name to enableProdLogConfig

move log opts to logger package

remove empty else and reset timeEncoder

update flag description

use get function to handle nil

rename flag and update logger function

Update main.go

Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>

Update controllers/horizontal_runner_autoscaler_webhook.go

Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>

Update logging/logger.go

Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>

copy log opt per each NewLogger call

revert to use autoscaler.log

update flag descript and remove unused imports

add logFormat to readme

 rename setupLog to logger

make fmt

* Fix E2E along the way

Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
This commit is contained in:
malachiobadeyi
2022-11-04 01:46:58 +00:00
committed by GitHub
parent 63e8f32281
commit fbdfe0df8c
12 changed files with 150 additions and 60 deletions

View File

@@ -235,6 +235,8 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons
"repository.owner.type", e.Repo.Owner.GetType(),
"enterprise.slug", enterpriseSlug,
"action", e.GetAction(),
"workflowJob.runID", e.WorkflowJob.GetRunID(),
"workflowJob.ID", e.WorkflowJob.GetID(),
)
}
@@ -343,7 +345,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons
msg := fmt.Sprintf("scaled %s by %d", target.Name, target.Amount)
autoscaler.Log.Info(msg)
log.Info(msg)
if written, err := w.Write([]byte(msg)); err != nil {
log.Error(err, "failed writing http response", "msg", msg, "written", written)

View File

@@ -616,14 +616,14 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
// Scale-up to 2 replicas on first workflow_job.queued webhook event
{
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"})
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}, int64(1234), int64(4321))
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
}
// Scale-up to 3 replicas on second workflow_job.queued webhook event
{
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"})
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}, int64(1234), int64(4321))
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
}
@@ -631,7 +631,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
// Do not scale-up on third workflow_job.queued webhook event
// repo "example" doesn't match our Spec
{
env.SendWorkflowJobEvent("test", "example", "queued", []string{"self-hosted"})
env.SendWorkflowJobEvent("test", "example", "queued", []string{"self-hosted"}, int64(1234), int64(4321))
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after third webhook event")
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
}
@@ -1250,7 +1250,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
// Scale-up to 2 replicas on first workflow_job webhook event
{
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"})
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}, int64(1234), int64(4321))
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
@@ -1334,7 +1334,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
// Scale-up to 2 replicas on first workflow_job webhook event
{
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"custom-label"})
env.SendWorkflowJobEvent("test", "valid", "queued", []string{"custom-label"}, int64(1234), int64(4321))
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
@@ -1415,9 +1415,11 @@ func (env *testEnvironment) SendOrgCheckRunEvent(org, repo, status, action strin
ExpectWithOffset(1, resp.StatusCode).To(Equal(200))
}
func (env *testEnvironment) SendWorkflowJobEvent(org, repo, statusAndAction string, labels []string) {
func (env *testEnvironment) SendWorkflowJobEvent(org, repo, statusAndAction string, labels []string, runID int64, ID int64) {
resp, err := sendWebhook(env.webhookServer, "workflow_job", &github.WorkflowJobEvent{
WorkflowJob: &github.WorkflowJob{
ID: &ID,
RunID: &runID,
Status: &statusAndAction,
Labels: labels,
},

View File

@@ -110,9 +110,23 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
// Note: This logic is here to prevent a dead-lock between ARC and the PV provider.
//
// The author of this logic thinks that some (or all?) of CSI plugins and PV providers
// do not supported to provision dynamic PVs for a pod that is already marked for deletion.
// do not support provisioning dynamic PVs for a pod that is already marked for deletion.
// If we didn't handle this case here, ARC would end up with waiting forever until the
// PV provider to provision PVs for the pod, which seems to never happen.
// PV provider(s) provision PVs for the pod, which seems to never happen.
//
// For reference, the below is an eaxmple of pod.status that you might see when it happened:
// status:
// conditions:
// - lastProbeTime: null
// lastTransitionTime: "2022-11-04T00:04:05Z"
// message: 'binding rejected: running Bind plugin "DefaultBinder": Operation cannot
// be fulfilled on pods/binding "org-runnerdeploy-xv2lg-pm6t2": pod org-runnerdeploy-xv2lg-pm6t2
// is being deleted, cannot be assigned to a host'
// reason: SchedulerError
// status: "False"
// type: PodScheduled
// phase: Pending
// qosClass: BestEffort
log.Info(
"Unregistration started before runner pod gets scheduled onto a node. "+
"Perhaps the runner is taking a long time due to e.g. slow CSI slugin not giving us a PV in a timely manner, or your Kubernetes cluster is overloaded? "+