From 27fec39c655e2fc6f68cf20ac4004de3c6600e41 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Mon, 20 Sep 2021 11:20:54 -0400 Subject: [PATCH 1/4] RemoveFailedPods: guard against nil descheduler strategy (e.g. in case of default that loads all strategies) --- pkg/descheduler/strategies/failedpods.go | 12 +++++++++--- pkg/descheduler/strategies/failedpods_test.go | 8 ++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index e9c7ad23c6..e21e82a79c 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -19,7 +19,7 @@ import ( // validatedFailedPodsStrategyParams contains validated strategy parameters type validatedFailedPodsStrategyParams struct { - *validation.ValidatedStrategyParams + validation.ValidatedStrategyParams includingInitContainers bool reasons sets.String excludeOwnerKinds sets.String @@ -46,9 +46,15 @@ func RemoveFailedPods( evictions.WithLabelSelector(strategyParams.LabelSelector), ) + var labelSelector *metav1.LabelSelector + if strategy.Params != nil { + labelSelector = strategy.Params.LabelSelector + } + for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase=" + string(v1.PodFailed) + pods, err := podutil.ListPodsOnANodeWithFieldSelector( ctx, client, @@ -57,7 +63,7 @@ func RemoveFailedPods( podutil.WithFilter(evictable.IsEvictable), podutil.WithNamespaces(strategyParams.IncludedNamespaces.UnsortedList()), podutil.WithoutNamespaces(strategyParams.ExcludedNamespaces.UnsortedList()), - podutil.WithLabelSelector(strategy.Params.LabelSelector), + podutil.WithLabelSelector(labelSelector), ) if err != nil { klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node)) @@ -103,7 +109,7 @@ func validateAndParseRemoveFailedPodsParams( } return &validatedFailedPodsStrategyParams{ - ValidatedStrategyParams: strategyParams, + ValidatedStrategyParams: *strategyParams, includingInitContainers: includingInitContainers, reasons: reasons, excludeOwnerKinds: excludeOwnerKinds, diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/descheduler/strategies/failedpods_test.go index 52f4f05740..c5caf5a43c 100644 --- a/pkg/descheduler/strategies/failedpods_test.go +++ b/pkg/descheduler/strategies/failedpods_test.go @@ -44,6 +44,13 @@ func TestRemoveFailedPods(t *testing.T) { expectedEvictedPodCount int pods []v1.Pod }{ + { + description: "default empty strategy, 0 failures, 0 evictions", + strategy: api.DeschedulerStrategy{}, + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{}, // no pods come back with field selector phase=Failed + }, { description: "0 failures, 0 evictions", strategy: createStrategy(true, false, nil, nil, nil, false), @@ -225,6 +232,7 @@ func TestValidRemoveFailedPodsParams(t *testing.T) { params *api.StrategyParameters }{ {name: "validate nil params", params: nil}, + {name: "validate empty params", params: &api.StrategyParameters{}}, {name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ Reasons: []string{"CreateContainerConfigError"}, }}}, From 8e1da960821444ebf4aaef6ab4e4dc0917e55890 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Mon, 20 Sep 2021 18:18:47 -0400 Subject: [PATCH 2/4] e2e TestTopologySpreadConstraint: ensure pods are running before checking for topology spread across domains --- test/e2e/e2e_topologyspreadconstraint_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go index ad814e0275..efbb6bef91 100644 --- a/test/e2e/e2e_topologyspreadconstraint_test.go +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -119,6 +119,9 @@ func TestTopologySpreadConstraint(t *testing.T) { t.Fatalf("Pods were not evicted for %s TopologySpreadConstraint", name) } + // Ensure recently evicted Pod are rescheduled and running before asserting for a balanced topology spread + waitForRCPodsRunning(ctx, t, clientSet, rc) + pods, err := clientSet.CoreV1().Pods(testNamespace.Name).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", tc.labelKey, tc.labelValue)}) if err != nil { t.Errorf("Error listing pods for %s: %v", name, err) From 41fca2ed61223b815ef8872bd7523594fdfd7a03 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Sat, 25 Sep 2021 00:45:41 -0400 Subject: [PATCH 3/4] e2e tests for RemoveFailedPods strategy Fix priority class default --- pkg/descheduler/strategies/failedpods.go | 4 +- .../strategies/validation/strategyparams.go | 12 +- test/e2e/e2e_failedpods_test.go | 154 ++++++++++++++++++ test/e2e/e2e_test.go | 32 ++-- test/e2e/e2e_topologyspreadconstraint_test.go | 19 +-- 5 files changed, 184 insertions(+), 37 deletions(-) create mode 100644 test/e2e/e2e_failedpods_test.go diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index e21e82a79c..74d79956c9 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -90,7 +90,9 @@ func validateAndParseRemoveFailedPodsParams( params *api.StrategyParameters, ) (*validatedFailedPodsStrategyParams, error) { if params == nil { - return &validatedFailedPodsStrategyParams{}, nil + return &validatedFailedPodsStrategyParams{ + ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(), + }, nil } strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params) diff --git a/pkg/descheduler/strategies/validation/strategyparams.go b/pkg/descheduler/strategies/validation/strategyparams.go index cd6d4570af..33446e6fa1 100644 --- a/pkg/descheduler/strategies/validation/strategyparams.go +++ b/pkg/descheduler/strategies/validation/strategyparams.go @@ -22,20 +22,22 @@ type ValidatedStrategyParams struct { NodeFit bool } +func DefaultValidatedStrategyParams() ValidatedStrategyParams { + return ValidatedStrategyParams{ThresholdPriority: utils.SystemCriticalPriority} +} + func ValidateAndParseStrategyParams( ctx context.Context, client clientset.Interface, params *api.StrategyParameters, ) (*ValidatedStrategyParams, error) { - var includedNamespaces, excludedNamespaces sets.String if params == nil { - return &ValidatedStrategyParams{ - IncludedNamespaces: includedNamespaces, - ExcludedNamespaces: excludedNamespaces, - }, nil + defaultValidatedStrategyParams := DefaultValidatedStrategyParams() + return &defaultValidatedStrategyParams, nil } // At most one of include/exclude can be set + var includedNamespaces, excludedNamespaces sets.String if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return nil, fmt.Errorf("only one of Include/Exclude namespaces can be set") } diff --git a/test/e2e/e2e_failedpods_test.go b/test/e2e/e2e_failedpods_test.go new file mode 100644 index 0000000000..4b4a9665fe --- /dev/null +++ b/test/e2e/e2e_failedpods_test.go @@ -0,0 +1,154 @@ +package e2e + +import ( + "context" + batchv1 "k8s.io/api/batch/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "strings" + "testing" + "time" + + deschedulerapi "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/strategies" +) + +var oneHourPodLifetimeSeconds uint = 3600 + +func TestFailedPods(t *testing.T) { + ctx := context.Background() + clientSet, _, stopCh := initializeClient(t) + defer close(stopCh) + nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + t.Errorf("Error listing node with %v", err) + } + nodes, _ := splitNodesAndWorkerNodes(nodeList.Items) + t.Log("Creating testing namespace") + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + testCases := map[string]struct { + expectedEvictedCount int + strategyParams *deschedulerapi.StrategyParameters + }{ + "test-failed-pods-nil-strategy": { + expectedEvictedCount: 1, + strategyParams: nil, + }, + "test-failed-pods-default-strategy": { + expectedEvictedCount: 1, + strategyParams: &deschedulerapi.StrategyParameters{}, + }, + "test-failed-pods-default-failed-pods": { + expectedEvictedCount: 1, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{}, + }, + }, + "test-failed-pods-reason-unmatched": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{Reasons: []string{"ReasonDoesNotMatch"}}, + }, + }, + "test-failed-pods-min-age-unmet": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{MinPodLifetimeSeconds: &oneHourPodLifetimeSeconds}, + }, + }, + "test-failed-pods-exclude-job-kind": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{ExcludeOwnerKinds: []string{"Job"}}, + }, + }, + } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + job := initFailedJob(name, testNamespace.Namespace) + t.Logf("Creating job %s in %s namespace", job.Name, job.Namespace) + jobClient := clientSet.BatchV1().Jobs(testNamespace.Name) + if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating Job %s: %v", name, err) + } + deletePropagationPolicy := metav1.DeletePropagationForeground + defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) + + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + + t.Logf("Running RemoveFailedPods strategy for %s", name) + strategies.RemoveFailedPods( + ctx, + clientSet, + deschedulerapi.DeschedulerStrategy{ + Enabled: true, + Params: tc.strategyParams, + }, + nodes, + podEvictor, + ) + t.Logf("Finished RemoveFailedPods strategy for %s", name) + + if actualEvictedCount := podEvictor.TotalEvicted(); actualEvictedCount == tc.expectedEvictedCount { + t.Logf("Total of %d Pods were evicted for %s", actualEvictedCount, name) + } else { + t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedCount, tc.expectedEvictedCount) + } + }) + } +} + +func initFailedJob(name, namespace string) *batchv1.Job { + podSpec := MakePodSpec("", nil) + podSpec.Containers[0].Command = []string{"/bin/false"} + podSpec.RestartPolicy = v1.RestartPolicyNever + labelsSet := labels.Set{"test": name, "name": name} + jobBackoffLimit := int32(0) + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelsSet, + Name: name, + Namespace: namespace, + }, + Spec: batchv1.JobSpec{ + Template: v1.PodTemplateSpec{ + Spec: podSpec, + ObjectMeta: metav1.ObjectMeta{Labels: labelsSet}, + }, + BackoffLimit: &jobBackoffLimit, + }, + } +} + +func waitForJobPodPhase(ctx context.Context, t *testing.T, clientSet clientset.Interface, job *batchv1.Job, phase v1.PodPhase) { + podClient := clientSet.CoreV1().Pods(job.Namespace) + if err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { + t.Log(labels.FormatLabels(job.Labels)) + if podList, err := podClient.List(ctx, metav1.ListOptions{LabelSelector: labels.FormatLabels(job.Labels)}); err != nil { + return false, err + } else { + if len(podList.Items) == 0 { + t.Logf("Job controller has not created Pod for job %s yet", job.Name) + return false, nil + } + for _, pod := range podList.Items { + if pod.Status.Phase != phase { + t.Logf("Pod %v not in %s phase yet, is %v instead", pod.Name, phase, pod.Status.Phase) + return false, nil + } + } + t.Logf("Job %v Pod is in %s phase now", job.Name, phase) + return true, nil + } + }); err != nil { + t.Fatalf("Error waiting for pods in %s phase: %v", phase, err) + } +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 9a3008f2a6..0c1d435d57 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -290,20 +290,7 @@ func TestLowNodeUtilization(t *testing.T) { waitForRCPodsRunning(ctx, t, clientSet, rc) // Run LowNodeUtilization strategy - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("%v", err) - } - podEvictor := evictions.NewPodEvictor( - clientSet, - evictionPolicyGroupVersion, - false, - 0, - nodes, - true, - false, - false, - ) + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) podsOnMosttUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, workerNodes[0], podutil.WithFilter(podEvictor.Evictable().IsEvictable)) if err != nil { @@ -1293,3 +1280,20 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) { } return allNodes, workerNodes } + +func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor { + evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) + if err != nil || len(evictionPolicyGroupVersion) == 0 { + t.Fatalf("Error creating eviction policy group: %v", err) + } + return evictions.NewPodEvictor( + clientSet, + evictionPolicyGroupVersion, + false, + 0, + nodes, + true, + false, + false, + ) +} diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go index efbb6bef91..d6e1c8b427 100644 --- a/test/e2e/e2e_topologyspreadconstraint_test.go +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -11,8 +11,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" deschedulerapi "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" "sigs.k8s.io/descheduler/pkg/descheduler/strategies" ) @@ -79,22 +77,9 @@ func TestTopologySpreadConstraint(t *testing.T) { defer deleteRC(ctx, t, clientSet, violatorRc) waitForRCPodsRunning(ctx, t, clientSet, violatorRc) - // Run TopologySpreadConstraint strategy - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("Error creating eviction policy group for %s: %v", name, err) - } - podEvictor := evictions.NewPodEvictor( - clientSet, - evictionPolicyGroupVersion, - false, - 0, - nodes, - true, - false, - false, - ) + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + // Run TopologySpreadConstraint strategy t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name) strategies.RemovePodsViolatingTopologySpreadConstraint( ctx, From 12e6bf931da26b45ba8b8d4980d63c7b8094a512 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Tue, 28 Sep 2021 10:55:01 -0400 Subject: [PATCH 4/4] Update Docs, Manifests, and Helm Chart version to 0.22.1 --- README.md | 6 +++--- charts/descheduler/Chart.yaml | 4 ++-- docs/user-guide.md | 1 + kubernetes/cronjob/cronjob.yaml | 2 +- kubernetes/deployment/deployment.yaml | 2 +- kubernetes/job/job.yaml | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e9a23ca034..36dce8aa0d 100644 --- a/README.md +++ b/README.md @@ -103,17 +103,17 @@ See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/ku Run As A Job ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.1' | kubectl apply -f - ``` Run As A CronJob ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.1' | kubectl apply -f - ``` Run As A Deployment ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.1' | kubectl apply -f - ``` ## User Guide diff --git a/charts/descheduler/Chart.yaml b/charts/descheduler/Chart.yaml index b194a1b7ea..dab3328232 100644 --- a/charts/descheduler/Chart.yaml +++ b/charts/descheduler/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: descheduler -version: 0.22.0 -appVersion: 0.22.0 +version: 0.22.1 +appVersion: 0.22.1 description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that. keywords: - kubernetes diff --git a/docs/user-guide.md b/docs/user-guide.md index f4a4881e6d..c7996020d5 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -4,6 +4,7 @@ Starting with descheduler release v0.10.0 container images are available in the Descheduler Version | Container Image | Architectures | ------------------- |-----------------------------------------------------|-------------------------| +v0.22.1 | k8s.gcr.io/descheduler/descheduler:v0.22.1 | AMD64
ARM64
ARMv7 | v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64
ARM64
ARMv7 | v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64
ARM64
ARMv7 | v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64
ARM64 | diff --git a/kubernetes/cronjob/cronjob.yaml b/kubernetes/cronjob/cronjob.yaml index c38d52efae..8f30b53797 100644 --- a/kubernetes/cronjob/cronjob.yaml +++ b/kubernetes/cronjob/cronjob.yaml @@ -16,7 +16,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 volumeMounts: - mountPath: /policy-dir name: policy-volume diff --git a/kubernetes/deployment/deployment.yaml b/kubernetes/deployment/deployment.yaml index 478cc69072..82ff868065 100644 --- a/kubernetes/deployment/deployment.yaml +++ b/kubernetes/deployment/deployment.yaml @@ -19,7 +19,7 @@ spec: serviceAccountName: descheduler-sa containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 imagePullPolicy: IfNotPresent command: - "/bin/descheduler" diff --git a/kubernetes/job/job.yaml b/kubernetes/job/job.yaml index 9f28d29767..1c544fee54 100644 --- a/kubernetes/job/job.yaml +++ b/kubernetes/job/job.yaml @@ -14,7 +14,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 volumeMounts: - mountPath: /policy-dir name: policy-volume