From eb1146463dc1e63af3b13859b5193fcb53415813 Mon Sep 17 00:00:00 2001 From: grzesuav Date: Sat, 2 Dec 2023 14:37:48 +0100 Subject: [PATCH] feat: Flag for enabling recording of failed evictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Grzegorz Głąb --- cmd/descheduler/app/options/options.go | 1 + pkg/apis/componentconfig/types.go | 3 + pkg/descheduler/descheduler.go | 1 + pkg/descheduler/evictions/evictions.go | 66 +++++++++++-------- .../highnodeutilization_test.go | 2 + .../lownodeutilization_test.go | 6 ++ .../plugins/podlifetime/pod_lifetime_test.go | 5 ++ .../removeduplicates/removeduplicates_test.go | 6 ++ .../removefailedpods/failedpods_test.go | 5 ++ .../toomanyrestarts_test.go | 5 ++ .../pod_antiaffinity_test.go | 5 ++ .../node_affinity_test.go | 5 ++ .../node_taint_test.go | 5 ++ .../topologyspreadconstraint_test.go | 5 ++ pkg/framework/profile/profile_test.go | 10 ++- 15 files changed, 100 insertions(+), 30 deletions(-) diff --git a/cmd/descheduler/app/options/options.go b/cmd/descheduler/app/options/options.go index 384afbda27..3cea6f9417 100644 --- a/cmd/descheduler/app/options/options.go +++ b/cmd/descheduler/app/options/options.go @@ -102,6 +102,7 @@ func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) { fs.Float64Var(&rs.Tracing.SampleRate, "otel-sample-rate", 1.0, "Sample rate to collect the Traces") fs.BoolVar(&rs.Tracing.FallbackToNoOpProviderOnError, "otel-fallback-no-op-on-error", false, "Fallback to NoOp Tracer in case of error") fs.BoolVar(&rs.EnableHTTP2, "enable-http2", false, "If http/2 should be enabled for the metrics and health check") + fs.BoolVar(&rs.RecordEventsForEvictionErrors, "record-events-for-eviction-errors", false, "Set this flag to record events in case of eviction errors") componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs) diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 328eed2138..b02e778b30 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -63,6 +63,9 @@ type DeschedulerConfiguration struct { // ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver. // Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information. ClientConnection componentbaseconfig.ClientConnectionConfiguration + + // RecordEventsForEvictionErrors sets event recording in case of eviction error + RecordEventsForEvictionErrors bool } type TracingConfiguration struct { diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 8d87416ad0..baadb719c8 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -162,6 +162,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node) nodes, !d.rs.DisableMetrics, d.eventRecorder, + d.rs.RecordEventsForEvictionErrors, ) d.runProfiles(ctx, client, nodes, podEvictor) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 381f935ef1..50e8522abb 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -42,16 +42,17 @@ type ( ) type PodEvictor struct { - client clientset.Interface - nodes []*v1.Node - policyGroupVersion string - dryRun bool - maxPodsToEvictPerNode *uint - maxPodsToEvictPerNamespace *uint - nodepodCount nodePodEvictedCount - namespacePodCount namespacePodEvictCount - metricsEnabled bool - eventRecorder events.EventRecorder + client clientset.Interface + nodes []*v1.Node + policyGroupVersion string + dryRun bool + maxPodsToEvictPerNode *uint + maxPodsToEvictPerNamespace *uint + nodepodCount nodePodEvictedCount + namespacePodCount namespacePodEvictCount + metricsEnabled bool + eventRecorder events.EventRecorder + recordEventsForEvictionErrors bool } func NewPodEvictor( @@ -63,6 +64,7 @@ func NewPodEvictor( nodes []*v1.Node, metricsEnabled bool, eventRecorder events.EventRecorder, + recordEventsForEvictionErrors bool, ) *PodEvictor { nodePodCount := make(nodePodEvictedCount) namespacePodCount := make(namespacePodEvictCount) @@ -72,16 +74,17 @@ func NewPodEvictor( } return &PodEvictor{ - client: client, - nodes: nodes, - policyGroupVersion: policyGroupVersion, - dryRun: dryRun, - maxPodsToEvictPerNode: maxPodsToEvictPerNode, - maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace, - nodepodCount: nodePodCount, - namespacePodCount: namespacePodCount, - metricsEnabled: metricsEnabled, - eventRecorder: eventRecorder, + client: client, + nodes: nodes, + policyGroupVersion: policyGroupVersion, + dryRun: dryRun, + maxPodsToEvictPerNode: maxPodsToEvictPerNode, + maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace, + nodepodCount: nodePodCount, + namespacePodCount: namespacePodCount, + metricsEnabled: metricsEnabled, + eventRecorder: eventRecorder, + recordEventsForEvictionErrors: recordEventsForEvictionErrors, } } @@ -153,6 +156,10 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } + if pe.recordEventsForEvictionErrors { + reason := extractReason(opts, strategy) + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod cannot be evicted from %v node by sigs.k8s.io/descheduler: %v", pod.Spec.NodeName, err) + } return false } @@ -169,18 +176,23 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName) } else { klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName) - reason := opts.Reason - if len(reason) == 0 { - reason = strategy - if len(reason) == 0 { - reason = "NotSet" - } - } + reason := extractReason(opts, strategy) pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName) } return true } +func extractReason(opts EvictOptions, strategy string) string { + reason := opts.Reason + if len(reason) == 0 { + reason = strategy + if len(reason) == 0 { + reason = "NotSet" + } + } + return reason +} + func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error { deleteOptions := &metav1.DeleteOptions{} // GracePeriodSeconds ? diff --git a/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go b/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go index bfe55594ee..bd84cdc2f4 100644 --- a/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go @@ -493,6 +493,7 @@ func TestHighNodeUtilization(t *testing.T) { testCase.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ @@ -645,6 +646,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) { item.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go index 63ade79f0b..fbadb26f31 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go @@ -41,6 +41,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func TestLowNodeUtilization(t *testing.T) { n1NodeName := "n1" n2NodeName := "n2" @@ -895,6 +899,7 @@ func TestLowNodeUtilization(t *testing.T) { test.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{ @@ -1067,6 +1072,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) { item.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/podlifetime/pod_lifetime_test.go b/pkg/framework/plugins/podlifetime/pod_lifetime_test.go index 086acf3abb..338ebe4c64 100644 --- a/pkg/framework/plugins/podlifetime/pod_lifetime_test.go +++ b/pkg/framework/plugins/podlifetime/pod_lifetime_test.go @@ -36,6 +36,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func TestPodLifeTime(t *testing.T) { node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) @@ -354,6 +358,7 @@ func TestPodLifeTime(t *testing.T) { tc.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removeduplicates/removeduplicates_test.go b/pkg/framework/plugins/removeduplicates/removeduplicates_test.go index 6a14ba878c..e2e62df1a7 100644 --- a/pkg/framework/plugins/removeduplicates/removeduplicates_test.go +++ b/pkg/framework/plugins/removeduplicates/removeduplicates_test.go @@ -39,6 +39,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func buildTestPodWithImage(podName, node, image string) *v1.Pod { pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef) pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{ @@ -322,6 +326,7 @@ func TestFindDuplicatePods(t *testing.T) { testCase.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) nodeFit := testCase.nodefit @@ -771,6 +776,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { testCase.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removefailedpods/failedpods_test.go b/pkg/framework/plugins/removefailedpods/failedpods_test.go index 0179fbca5b..33f711440d 100644 --- a/pkg/framework/plugins/removefailedpods/failedpods_test.go +++ b/pkg/framework/plugins/removefailedpods/failedpods_test.go @@ -36,6 +36,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + var OneHourInSeconds uint = 3600 func TestRemoveFailedPods(t *testing.T) { @@ -298,6 +302,7 @@ func TestRemoveFailedPods(t *testing.T) { tc.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go index 45dfd9d56a..71d235f3d9 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go @@ -37,6 +37,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func initPods(node *v1.Node) []*v1.Pod { pods := make([]*v1.Pod, 0) @@ -317,6 +321,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { tc.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go index 9da0dc6b18..b5d7b10f95 100644 --- a/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go +++ b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go @@ -37,6 +37,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func TestPodAntiAffinity(t *testing.T) { node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) { node.ObjectMeta.Labels = map[string]string{ @@ -242,6 +246,7 @@ func TestPodAntiAffinity(t *testing.T) { test.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go b/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go index 9cf0cdd2a2..c37d68641d 100644 --- a/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go +++ b/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go @@ -36,6 +36,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func TestRemovePodsViolatingNodeAffinity(t *testing.T) { nodeLabelKey := "kubernetes.io/desiredNode" nodeLabelValue := "yes" @@ -368,6 +372,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { tc.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go b/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go index 7eeabd4d29..befcf3b3a1 100644 --- a/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go +++ b/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go @@ -38,6 +38,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func createNoScheduleTaint(key, value string, index int) v1.Taint { return v1.Taint{ Key: "testTaint" + fmt.Sprintf("%v", index), @@ -360,6 +364,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { tc.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go b/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go index 67aab19c2d..dac6300008 100644 --- a/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go +++ b/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go @@ -27,6 +27,10 @@ import ( "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func TestTopologySpreadConstraint(t *testing.T) { testCases := []struct { name string @@ -1465,6 +1469,7 @@ func TestTopologySpreadConstraint(t *testing.T) { tc.nodes, false, eventRecorder, + noRecordEventsForEvictionFailures, ) defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{ diff --git a/pkg/framework/profile/profile_test.go b/pkg/framework/profile/profile_test.go index 21331c9b4b..87ebef68c9 100644 --- a/pkg/framework/profile/profile_test.go +++ b/pkg/framework/profile/profile_test.go @@ -28,6 +28,10 @@ import ( testutils "sigs.k8s.io/descheduler/test" ) +const ( + noRecordEventsForEvictionFailures = false +) + func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) { tests := []struct { name string @@ -244,7 +248,7 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) { eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient) defer eventBroadcaster.Shutdown() - podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder) + podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder, noRecordEventsForEvictionFailures) prfl, err := NewProfile( test.config, @@ -393,7 +397,7 @@ func TestProfileExtensionPoints(t *testing.T) { eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient) defer eventBroadcaster.Shutdown() - podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder) + podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder, noRecordEventsForEvictionFailures) prfl, err := NewProfile( api.DeschedulerProfile{ @@ -605,7 +609,7 @@ func TestProfileExtensionPointOrdering(t *testing.T) { eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient) defer eventBroadcaster.Shutdown() - podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder) + podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder, noRecordEventsForEvictionFailures) prfl, err := NewProfile( api.DeschedulerProfile{