Skip to content

Commit

Permalink
feat: Flag for enabling recording of failed evictions
Browse files Browse the repository at this point in the history
Signed-off-by: Grzegorz Głąb <[email protected]>
  • Loading branch information
grzesuav committed May 22, 2024
1 parent 1c1b1a7 commit 82eff45
Show file tree
Hide file tree
Showing 21 changed files with 115 additions and 31 deletions.
1 change: 1 addition & 0 deletions cmd/descheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
fs.Float64Var(&rs.Tracing.SampleRate, "otel-sample-rate", 1.0, "Sample rate to collect the Traces")
fs.BoolVar(&rs.Tracing.FallbackToNoOpProviderOnError, "otel-fallback-no-op-on-error", false, "Fallback to NoOp Tracer in case of error")
fs.BoolVar(&rs.EnableHTTP2, "enable-http2", false, "If http/2 should be enabled for the metrics and health check")
fs.BoolVar(&rs.RecordEventsForEvictionErrors, "record-events-for-eviction-errors", false, "Set this flag to record events in case of eviction errors")

componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs)

Expand Down
1 change: 1 addition & 0 deletions docs/cli/descheduler.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ descheduler [flags]
--permit-address-sharing If true, SO_REUSEADDR will be used when binding the port. This allows binding to wildcard IPs like 0.0.0.0 and specific IPs in parallel, and it avoids waiting for the kernel to release sockets in TIME_WAIT state. [default=false]
--permit-port-sharing If true, SO_REUSEPORT will be used when binding the port, which allows more than one instance to bind on the same address and port. [default=false]
--policy-config-file string File with descheduler policy configuration.
--record-events-for-eviction-errors Set this flag to record events in case of eviction errors
--secure-port int The port on which to serve HTTPS with authentication and authorization. If 0, don't serve HTTPS at all. (default 10258)
--tls-cert-file string File containing the default x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If HTTPS serving is enabled, and --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory specified by --cert-dir.
--tls-cipher-suites strings Comma-separated list of cipher suites for the server. If omitted, the default Go cipher suites will be used.
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/componentconfig/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ type DeschedulerConfiguration struct {
// ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver.
// Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information.
ClientConnection componentbaseconfig.ClientConnectionConfiguration

// RecordEventsForEvictionErrors sets event recording in case of eviction error
RecordEventsForEvictionErrors bool
}

type TracingConfiguration struct {
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/componentconfig/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ type DeschedulerConfiguration struct {
// ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver.
// Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information.
ClientConnection componentbaseconfig.ClientConnectionConfiguration `json:"clientConnection,omitempty"`

// RecordEventsForEvictionErrors sets event recording in case of eviction errorGws
RecordEventsForEvictionErrors bool
}

type TracingConfiguration struct {
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
nodes,
!d.rs.DisableMetrics,
d.eventRecorder,
d.rs.RecordEventsForEvictionErrors,
)

d.runProfiles(ctx, client, nodes, podEvictor)
Expand Down
68 changes: 40 additions & 28 deletions pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,17 @@ type (
)

type PodEvictor struct {
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
metricsEnabled bool
eventRecorder events.EventRecorder
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
metricsEnabled bool
eventRecorder events.EventRecorder
recordEventsForEvictionErrors bool
}

func NewPodEvictor(
Expand All @@ -63,6 +64,7 @@ func NewPodEvictor(
nodes []*v1.Node,
metricsEnabled bool,
eventRecorder events.EventRecorder,
recordEventsForEvictionErrors bool,
) *PodEvictor {
nodePodCount := make(nodePodEvictedCount)
namespacePodCount := make(namespacePodEvictCount)
Expand All @@ -72,16 +74,17 @@ func NewPodEvictor(
}

return &PodEvictor{
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
nodepodCount: nodePodCount,
namespacePodCount: namespacePodCount,
metricsEnabled: metricsEnabled,
eventRecorder: eventRecorder,
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
nodepodCount: nodePodCount,
namespacePodCount: namespacePodCount,
metricsEnabled: metricsEnabled,
eventRecorder: eventRecorder,
recordEventsForEvictionErrors: recordEventsForEvictionErrors,
}
}

Expand Down Expand Up @@ -152,6 +155,10 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
if pe.recordEventsForEvictionErrors {
reason := extractReason(opts, opts.StrategyName)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod cannot be evicted from %v node by sigs.k8s.io/descheduler: %v", pod.Spec.NodeName, err)
}
return false
}

Expand All @@ -167,19 +174,24 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.dryRun {
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName)
} else {
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName)
reason := opts.Reason
if len(reason) == 0 {
reason = opts.StrategyName
if len(reason) == 0 {
reason = "NotSet"
}
}
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName)
reason := extractReason(opts, opts.StrategyName)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
}
return true
}

func extractReason(opts EvictOptions, strategy string) string {
reason := opts.Reason
if len(reason) == 0 {
reason = strategy
if len(reason) == 0 {
reason = "NotSet"
}
}
return reason
}

func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
deleteOptions := &metav1.DeleteOptions{}
// GracePeriodSeconds ?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ func TestHighNodeUtilization(t *testing.T) {
testCase.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down Expand Up @@ -645,6 +646,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
item.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestLowNodeUtilization(t *testing.T) {
n1NodeName := "n1"
n2NodeName := "n2"
Expand Down Expand Up @@ -895,6 +899,7 @@ func TestLowNodeUtilization(t *testing.T) {
test.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down Expand Up @@ -1067,6 +1072,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
item.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
5 changes: 5 additions & 0 deletions pkg/framework/plugins/podlifetime/pod_lifetime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestPodLifeTime(t *testing.T) {
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC))
Expand Down Expand Up @@ -565,6 +569,7 @@ func TestPodLifeTime(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func buildTestPodWithImage(podName, node, image string) *v1.Pod {
pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef)
pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
Expand Down Expand Up @@ -322,6 +326,7 @@ func TestFindDuplicatePods(t *testing.T) {
testCase.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

nodeFit := testCase.nodefit
Expand Down Expand Up @@ -771,6 +776,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
testCase.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
5 changes: 5 additions & 0 deletions pkg/framework/plugins/removefailedpods/failedpods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

var OneHourInSeconds uint = 3600

func TestRemoveFailedPods(t *testing.T) {
Expand Down Expand Up @@ -384,6 +388,7 @@ func TestRemoveFailedPods(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func initPods(node *v1.Node) []*v1.Pod {
pods := make([]*v1.Pod, 0)

Expand Down Expand Up @@ -353,6 +357,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestPodAntiAffinity(t *testing.T) {
node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
Expand Down Expand Up @@ -242,6 +246,7 @@ func TestPodAntiAffinity(t *testing.T) {
test.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
Expand Down Expand Up @@ -368,6 +372,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func createNoScheduleTaint(key, value string, index int) v1.Taint {
return v1.Taint{
Key: "testTaint" + fmt.Sprintf("%v", index),
Expand Down Expand Up @@ -409,6 +413,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestTopologySpreadConstraint(t *testing.T) {
testCases := []struct {
name string
Expand Down Expand Up @@ -1465,6 +1469,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Loading

0 comments on commit 82eff45

Please sign in to comment.